001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.util; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.HashMap; 025import java.util.List; 026import java.util.TreeMap; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileStatus; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.FileUtil; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.NamespaceDescriptor; 034import org.apache.hadoop.hbase.NamespaceNotFoundException; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.backup.BackupRestoreFactory; 037import org.apache.hadoop.hbase.backup.HBackupFileSystem; 038import org.apache.hadoop.hbase.backup.RestoreJob; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.TableDescriptor; 043import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 044import org.apache.hadoop.hbase.io.HFileLink; 045import org.apache.hadoop.hbase.io.hfile.HFile; 046import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 047import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 048import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 049import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool; 050import org.apache.hadoop.hbase.util.Bytes; 051import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 052import org.apache.hadoop.hbase.util.FSTableDescriptors; 053import org.apache.yetus.audience.InterfaceAudience; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 058 059/** 060 * A collection for methods used by multiple classes to restore HBase tables. 061 */ 062@InterfaceAudience.Private 063public class RestoreTool { 064 public static final Logger LOG = LoggerFactory.getLogger(BackupUtils.class); 065 private final static long TABLE_AVAILABILITY_WAIT_TIME = 180000; 066 067 private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR }; 068 protected Configuration conf; 069 protected Path backupRootPath; 070 protected String backupId; 071 protected FileSystem fs; 072 073 // store table name and snapshot dir mapping 074 private final HashMap<TableName, Path> snapshotMap = new HashMap<>(); 075 076 public RestoreTool(Configuration conf, final Path backupRootPath, final String backupId) 077 throws IOException { 078 this.conf = conf; 079 this.backupRootPath = backupRootPath; 080 this.backupId = backupId; 081 this.fs = backupRootPath.getFileSystem(conf); 082 } 083 084 /** 085 * return value represent path for: 086 * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/archive/data/default/t1_dn" 087 * @param tableName table name 088 * @return path to table archive 089 * @throws IOException exception 090 */ 091 Path getTableArchivePath(TableName tableName) throws IOException { 092 Path baseDir = 093 new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 094 HConstants.HFILE_ARCHIVE_DIRECTORY); 095 Path dataDir = new Path(baseDir, HConstants.BASE_NAMESPACE_DIR); 096 Path archivePath = new Path(dataDir, tableName.getNamespaceAsString()); 097 Path tableArchivePath = new Path(archivePath, tableName.getQualifierAsString()); 098 if (!fs.exists(tableArchivePath) || !fs.getFileStatus(tableArchivePath).isDirectory()) { 099 LOG.debug("Folder tableArchivePath: " + tableArchivePath.toString() + " does not exists"); 100 tableArchivePath = null; // empty table has no archive 101 } 102 return tableArchivePath; 103 } 104 105 /** 106 * Gets region list 107 * @param tableName table name 108 * @return RegionList region list 109 * @throws IOException exception 110 */ 111 ArrayList<Path> getRegionList(TableName tableName) throws IOException { 112 Path tableArchivePath = getTableArchivePath(tableName); 113 ArrayList<Path> regionDirList = new ArrayList<>(); 114 FileStatus[] children = fs.listStatus(tableArchivePath); 115 for (FileStatus childStatus : children) { 116 // here child refer to each region(Name) 117 Path child = childStatus.getPath(); 118 regionDirList.add(child); 119 } 120 return regionDirList; 121 } 122 123 void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException { 124 try (Admin admin = conn.getAdmin()) { 125 admin.modifyTable(desc); 126 int attempt = 0; 127 int maxAttempts = 600; 128 while (!admin.isTableAvailable(desc.getTableName())) { 129 Thread.sleep(100); 130 attempt++; 131 if (attempt++ > maxAttempts) { 132 throw new IOException("Timeout expired " + (maxAttempts * 100) + "ms"); 133 } 134 } 135 } catch (Exception e) { 136 throw new IOException(e); 137 } 138 } 139 140 /** 141 * During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently 142 * tableNames and newTablesNames only contain single table, will be expanded to multiple tables in 143 * the future 144 * @param conn HBase connection 145 * @param tableBackupPath backup path 146 * @param logDirs : incremental backup folders, which contains WAL 147 * @param tableNames : source tableNames(table names were backuped) 148 * @param newTableNames : target tableNames(table names to be restored to) 149 * @param incrBackupId incremental backup Id 150 * @throws IOException exception 151 */ 152 public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs, 153 TableName[] tableNames, TableName[] newTableNames, String incrBackupId) throws IOException { 154 try (Admin admin = conn.getAdmin()) { 155 if (tableNames.length != newTableNames.length) { 156 throw new IOException("Number of source tables and target tables does not match!"); 157 } 158 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 159 160 // for incremental backup image, expect the table already created either by user or previous 161 // full backup. Here, check that all new tables exists 162 for (TableName tableName : newTableNames) { 163 if (!admin.tableExists(tableName)) { 164 throw new IOException("HBase table " + tableName 165 + " does not exist. Create the table first, e.g. by restoring a full backup."); 166 } 167 } 168 // adjust table schema 169 for (int i = 0; i < tableNames.length; i++) { 170 TableName tableName = tableNames[i]; 171 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, incrBackupId); 172 if (tableDescriptor == null) { 173 throw new IOException("Can't find " + tableName + "'s descriptor."); 174 } 175 LOG.debug("Found descriptor " + tableDescriptor + " through " + incrBackupId); 176 177 TableName newTableName = newTableNames[i]; 178 TableDescriptor newTableDescriptor = admin.getDescriptor(newTableName); 179 List<ColumnFamilyDescriptor> families = Arrays.asList(tableDescriptor.getColumnFamilies()); 180 List<ColumnFamilyDescriptor> existingFamilies = 181 Arrays.asList(newTableDescriptor.getColumnFamilies()); 182 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(newTableDescriptor); 183 boolean schemaChangeNeeded = false; 184 for (ColumnFamilyDescriptor family : families) { 185 if (!existingFamilies.contains(family)) { 186 builder.setColumnFamily(family); 187 schemaChangeNeeded = true; 188 } 189 } 190 for (ColumnFamilyDescriptor family : existingFamilies) { 191 if (!families.contains(family)) { 192 builder.removeColumnFamily(family.getName()); 193 schemaChangeNeeded = true; 194 } 195 } 196 if (schemaChangeNeeded) { 197 modifyTableSync(conn, builder.build()); 198 LOG.info("Changed " + newTableDescriptor.getTableName() + " to: " + newTableDescriptor); 199 } 200 } 201 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 202 203 restoreService.run(logDirs, tableNames, newTableNames, false); 204 } 205 } 206 207 public void fullRestoreTable(Connection conn, Path tableBackupPath, TableName tableName, 208 TableName newTableName, boolean truncateIfExists, String lastIncrBackupId) throws IOException { 209 createAndRestoreTable(conn, tableName, newTableName, tableBackupPath, truncateIfExists, 210 lastIncrBackupId); 211 } 212 213 /** 214 * Returns value represent path for path to backup table snapshot directory: 215 * "/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot" 216 * @param backupRootPath backup root path 217 * @param tableName table name 218 * @param backupId backup Id 219 * @return path for snapshot 220 */ 221 Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) { 222 return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 223 HConstants.SNAPSHOT_DIR_NAME); 224 } 225 226 /** 227 * Returns value represent path for: 228 * ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/ 229 * snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and 230 * 0.98) this path contains .snapshotinfo, .data.manifest (trunk) 231 * @param tableName table name 232 * @return path to table info 233 * @throws IOException exception 234 */ 235 Path getTableInfoPath(TableName tableName) throws IOException { 236 Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 237 Path tableInfoPath = null; 238 239 // can't build the path directly as the timestamp values are different 240 FileStatus[] snapshots = fs.listStatus(tableSnapShotPath, 241 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 242 for (FileStatus snapshot : snapshots) { 243 tableInfoPath = snapshot.getPath(); 244 // SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest"; 245 if (tableInfoPath.getName().endsWith("data.manifest")) { 246 break; 247 } 248 } 249 return tableInfoPath; 250 } 251 252 /** 253 * Get table descriptor 254 * @param tableName is the table backed up 255 * @return {@link TableDescriptor} saved in backup image of the table 256 */ 257 TableDescriptor getTableDesc(TableName tableName) throws IOException { 258 Path tableInfoPath = this.getTableInfoPath(tableName); 259 SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath); 260 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc); 261 TableDescriptor tableDescriptor = manifest.getTableDescriptor(); 262 if (!tableDescriptor.getTableName().equals(tableName)) { 263 LOG.error("couldn't find Table Desc for table: " + tableName + " under tableInfoPath: " 264 + tableInfoPath.toString()); 265 LOG.error( 266 "tableDescriptor.getNameAsString() = " + tableDescriptor.getTableName().getNameAsString()); 267 throw new FileNotFoundException("couldn't find Table Desc for table: " + tableName 268 + " under tableInfoPath: " + tableInfoPath.toString()); 269 } 270 return tableDescriptor; 271 } 272 273 private TableDescriptor getTableDescriptor(FileSystem fileSys, TableName tableName, 274 String lastIncrBackupId) throws IOException { 275 if (lastIncrBackupId != null) { 276 String target = 277 BackupUtils.getTableBackupDir(backupRootPath.toString(), lastIncrBackupId, tableName); 278 return FSTableDescriptors.getTableDescriptorFromFs(fileSys, new Path(target)); 279 } 280 return null; 281 } 282 283 private void createAndRestoreTable(Connection conn, TableName tableName, TableName newTableName, 284 Path tableBackupPath, boolean truncateIfExists, String lastIncrBackupId) throws IOException { 285 if (newTableName == null) { 286 newTableName = tableName; 287 } 288 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 289 290 // get table descriptor first 291 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, lastIncrBackupId); 292 if (tableDescriptor != null) { 293 LOG.debug("Retrieved descriptor: " + tableDescriptor + " thru " + lastIncrBackupId); 294 } 295 296 if (tableDescriptor == null) { 297 Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 298 if (fileSys.exists(tableSnapshotPath)) { 299 // snapshot path exist means the backup path is in HDFS 300 // check whether snapshot dir already recorded for target table 301 if (snapshotMap.get(tableName) != null) { 302 SnapshotDescription desc = 303 SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath); 304 SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc); 305 tableDescriptor = manifest.getTableDescriptor(); 306 } else { 307 tableDescriptor = getTableDesc(tableName); 308 snapshotMap.put(tableName, getTableInfoPath(tableName)); 309 } 310 if (tableDescriptor == null) { 311 LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost"); 312 } 313 } else { 314 throw new IOException( 315 "Table snapshot directory: " + tableSnapshotPath + " does not exist."); 316 } 317 } 318 319 Path tableArchivePath = getTableArchivePath(tableName); 320 if (tableArchivePath == null) { 321 if (tableDescriptor != null) { 322 // find table descriptor but no archive dir means the table is empty, create table and exit 323 if (LOG.isDebugEnabled()) { 324 LOG.debug("find table descriptor but no archive dir for table " + tableName 325 + ", will only create table"); 326 } 327 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 328 checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, null, tableDescriptor, 329 truncateIfExists); 330 return; 331 } else { 332 throw new IllegalStateException( 333 "Cannot restore hbase table because directory '" + " tableArchivePath is null."); 334 } 335 } 336 337 if (tableDescriptor == null) { 338 tableDescriptor = TableDescriptorBuilder.newBuilder(newTableName).build(); 339 } else { 340 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 341 } 342 343 // record all region dirs: 344 // load all files in dir 345 try { 346 ArrayList<Path> regionPathList = getRegionList(tableName); 347 348 // should only try to create the table with all region informations, so we could pre-split 349 // the regions in fine grain 350 checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, regionPathList, 351 tableDescriptor, truncateIfExists); 352 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 353 Path[] paths = new Path[regionPathList.size()]; 354 regionPathList.toArray(paths); 355 restoreService.run(paths, new TableName[] { tableName }, new TableName[] { newTableName }, 356 true); 357 358 } catch (Exception e) { 359 LOG.error(e.toString(), e); 360 throw new IllegalStateException("Cannot restore hbase table", e); 361 } 362 } 363 364 /** 365 * Gets region list 366 * @param tableArchivePath table archive path 367 * @return RegionList region list 368 * @throws IOException exception 369 */ 370 ArrayList<Path> getRegionList(Path tableArchivePath) throws IOException { 371 ArrayList<Path> regionDirList = new ArrayList<>(); 372 FileStatus[] children = fs.listStatus(tableArchivePath); 373 for (FileStatus childStatus : children) { 374 // here child refer to each region(Name) 375 Path child = childStatus.getPath(); 376 regionDirList.add(child); 377 } 378 return regionDirList; 379 } 380 381 /** 382 * Calculate region boundaries and add all the column families to the table descriptor 383 * @param regionDirList region dir list 384 * @return a set of keys to store the boundaries 385 */ 386 byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException { 387 TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR); 388 // Build a set of keys to store the boundaries 389 // calculate region boundaries and add all the column families to the table descriptor 390 for (Path regionDir : regionDirList) { 391 LOG.debug("Parsing region dir: " + regionDir); 392 Path hfofDir = regionDir; 393 394 if (!fs.exists(hfofDir)) { 395 LOG.warn("HFileOutputFormat dir " + hfofDir + " not found"); 396 } 397 398 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); 399 if (familyDirStatuses == null) { 400 throw new IOException("No families found in " + hfofDir); 401 } 402 403 for (FileStatus stat : familyDirStatuses) { 404 if (!stat.isDirectory()) { 405 LOG.warn("Skipping non-directory " + stat.getPath()); 406 continue; 407 } 408 boolean isIgnore = false; 409 String pathName = stat.getPath().getName(); 410 for (String ignore : ignoreDirs) { 411 if (pathName.contains(ignore)) { 412 LOG.warn("Skipping non-family directory" + pathName); 413 isIgnore = true; 414 break; 415 } 416 } 417 if (isIgnore) { 418 continue; 419 } 420 Path familyDir = stat.getPath(); 421 LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]"); 422 // Skip _logs, etc 423 if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) { 424 continue; 425 } 426 427 // start to parse hfile inside one family dir 428 Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); 429 for (Path hfile : hfiles) { 430 if ( 431 hfile.getName().startsWith("_") || hfile.getName().startsWith(".") 432 || StoreFileInfo.isReference(hfile.getName()) 433 || HFileLink.isHFileLink(hfile.getName()) 434 ) { 435 continue; 436 } 437 HFile.Reader reader = HFile.createReader(fs, hfile, conf); 438 final byte[] first, last; 439 try { 440 first = reader.getFirstRowKey().get(); 441 last = reader.getLastRowKey().get(); 442 LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first=" 443 + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last)); 444 445 // To eventually infer start key-end key boundaries 446 Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0; 447 map.put(first, value + 1); 448 value = map.containsKey(last) ? (Integer) map.get(last) : 0; 449 map.put(last, value - 1); 450 } finally { 451 reader.close(); 452 } 453 } 454 } 455 } 456 return BulkLoadHFilesTool.inferBoundaries(map); 457 } 458 459 /** 460 * Prepare the table for bulkload, most codes copied from {@code createTable} method in 461 * {@code BulkLoadHFilesTool}. 462 * @param conn connection 463 * @param tableBackupPath path 464 * @param tableName table name 465 * @param targetTableName target table name 466 * @param regionDirList region directory list 467 * @param htd table descriptor 468 * @param truncateIfExists truncates table if exists 469 * @throws IOException exception 470 */ 471 private void checkAndCreateTable(Connection conn, Path tableBackupPath, TableName tableName, 472 TableName targetTableName, ArrayList<Path> regionDirList, TableDescriptor htd, 473 boolean truncateIfExists) throws IOException { 474 try (Admin admin = conn.getAdmin()) { 475 boolean createNew = false; 476 if (admin.tableExists(targetTableName)) { 477 if (truncateIfExists) { 478 LOG.info( 479 "Truncating exising target table '" + targetTableName + "', preserving region splits"); 480 admin.disableTable(targetTableName); 481 admin.truncateTable(targetTableName, true); 482 } else { 483 LOG.info("Using exising target table '" + targetTableName + "'"); 484 } 485 } else { 486 createNew = true; 487 } 488 if (createNew) { 489 LOG.info("Creating target table '" + targetTableName + "'"); 490 byte[][] keys = null; 491 try { 492 if (regionDirList == null || regionDirList.size() == 0) { 493 admin.createTable(htd); 494 } else { 495 keys = generateBoundaryKeys(regionDirList); 496 // create table using table descriptor and region boundaries 497 admin.createTable(htd, keys); 498 } 499 } catch (NamespaceNotFoundException e) { 500 LOG.warn("There was no namespace and the same will be created"); 501 String namespaceAsString = targetTableName.getNamespaceAsString(); 502 LOG.info("Creating target namespace '" + namespaceAsString + "'"); 503 admin.createNamespace(NamespaceDescriptor.create(namespaceAsString).build()); 504 if (null == keys) { 505 admin.createTable(htd); 506 } else { 507 admin.createTable(htd, keys); 508 } 509 } 510 511 } 512 long startTime = EnvironmentEdgeManager.currentTime(); 513 while (!admin.isTableAvailable(targetTableName)) { 514 try { 515 Thread.sleep(100); 516 } catch (InterruptedException ie) { 517 Thread.currentThread().interrupt(); 518 } 519 if (EnvironmentEdgeManager.currentTime() - startTime > TABLE_AVAILABILITY_WAIT_TIME) { 520 throw new IOException("Time out " + TABLE_AVAILABILITY_WAIT_TIME + "ms expired, table " 521 + targetTableName + " is still not available"); 522 } 523 } 524 } 525 } 526}