001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.util; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.HashMap; 025import java.util.List; 026import java.util.TreeMap; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileStatus; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.FileUtil; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.NamespaceDescriptor; 034import org.apache.hadoop.hbase.NamespaceNotFoundException; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.backup.BackupRestoreFactory; 037import org.apache.hadoop.hbase.backup.HBackupFileSystem; 038import org.apache.hadoop.hbase.backup.RestoreJob; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.TableDescriptor; 043import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 044import org.apache.hadoop.hbase.io.HFileLink; 045import org.apache.hadoop.hbase.io.hfile.HFile; 046import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 047import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 048import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 049import org.apache.hadoop.hbase.snapshot.SnapshotTTLExpiredException; 050import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool; 051import org.apache.hadoop.hbase.util.Bytes; 052import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 053import org.apache.hadoop.hbase.util.FSTableDescriptors; 054import org.apache.yetus.audience.InterfaceAudience; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 059import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 060 061/** 062 * A collection for methods used by multiple classes to restore HBase tables. 063 */ 064@InterfaceAudience.Private 065public class RestoreTool { 066 public static final Logger LOG = LoggerFactory.getLogger(RestoreTool.class); 067 private final static long TABLE_AVAILABILITY_WAIT_TIME = 180000; 068 069 private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR }; 070 protected Configuration conf; 071 protected Path backupRootPath; 072 protected Path restoreRootDir; 073 protected String backupId; 074 protected FileSystem fs; 075 076 // store table name and snapshot dir mapping 077 private final HashMap<TableName, Path> snapshotMap = new HashMap<>(); 078 079 public RestoreTool(Configuration conf, final Path backupRootPath, final Path restoreRootDir, 080 final String backupId) throws IOException { 081 this.conf = conf; 082 this.backupRootPath = backupRootPath; 083 this.backupId = backupId; 084 this.fs = backupRootPath.getFileSystem(conf); 085 this.restoreRootDir = restoreRootDir; 086 } 087 088 /** 089 * return value represent path for: 090 * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/archive/data/default/t1_dn" 091 * @param tableName table name 092 * @return path to table archive 093 * @throws IOException exception 094 */ 095 Path getTableArchivePath(TableName tableName) throws IOException { 096 Path baseDir = 097 new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 098 HConstants.HFILE_ARCHIVE_DIRECTORY); 099 Path dataDir = new Path(baseDir, HConstants.BASE_NAMESPACE_DIR); 100 Path archivePath = new Path(dataDir, tableName.getNamespaceAsString()); 101 Path tableArchivePath = new Path(archivePath, tableName.getQualifierAsString()); 102 if (!fs.exists(tableArchivePath) || !fs.getFileStatus(tableArchivePath).isDirectory()) { 103 LOG.debug("Folder tableArchivePath: " + tableArchivePath.toString() + " does not exists"); 104 tableArchivePath = null; // empty table has no archive 105 } 106 return tableArchivePath; 107 } 108 109 /** 110 * Gets region list 111 * @param tableName table name 112 * @return RegionList region list 113 * @throws IOException exception 114 */ 115 ArrayList<Path> getRegionList(TableName tableName) throws IOException { 116 Path tableArchivePath = getTableArchivePath(tableName); 117 ArrayList<Path> regionDirList = new ArrayList<>(); 118 FileStatus[] children = fs.listStatus(tableArchivePath); 119 for (FileStatus childStatus : children) { 120 // here child refer to each region(Name) 121 Path child = childStatus.getPath(); 122 regionDirList.add(child); 123 } 124 return regionDirList; 125 } 126 127 void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException { 128 try (Admin admin = conn.getAdmin()) { 129 admin.modifyTable(desc); 130 int attempt = 0; 131 int maxAttempts = 600; 132 while (!admin.isTableAvailable(desc.getTableName())) { 133 Thread.sleep(100); 134 attempt++; 135 if (attempt++ > maxAttempts) { 136 throw new IOException("Timeout expired " + (maxAttempts * 100) + "ms"); 137 } 138 } 139 } catch (Exception e) { 140 throw new IOException(e); 141 } 142 } 143 144 /** 145 * During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently 146 * tableNames and newTablesNames only contain single table, will be expanded to multiple tables in 147 * the future 148 * @param conn HBase connection 149 * @param tableBackupPath backup path 150 * @param logDirs : incremental backup folders, which contains WAL 151 * @param tableNames : source tableNames(table names were backuped) 152 * @param newTableNames : target tableNames(table names to be restored to) 153 * @param incrBackupId incremental backup Id 154 * @param keepOriginalSplits whether the original region splits from the full backup should be 155 * kept 156 * @throws IOException exception 157 */ 158 public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs, 159 TableName[] tableNames, TableName[] newTableNames, String incrBackupId, 160 boolean keepOriginalSplits) throws IOException { 161 try (Admin admin = conn.getAdmin()) { 162 if (tableNames.length != newTableNames.length) { 163 throw new IOException("Number of source tables and target tables does not match!"); 164 } 165 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 166 167 // for incremental backup image, expect the table already created either by user or previous 168 // full backup. Here, check that all new tables exists 169 for (TableName tableName : newTableNames) { 170 if (!admin.tableExists(tableName)) { 171 throw new IOException("HBase table " + tableName 172 + " does not exist. Create the table first, e.g. by restoring a full backup."); 173 } 174 } 175 // adjust table schema 176 for (int i = 0; i < tableNames.length; i++) { 177 TableName tableName = tableNames[i]; 178 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, incrBackupId); 179 if (tableDescriptor == null) { 180 throw new IOException("Can't find " + tableName + "'s descriptor."); 181 } 182 LOG.debug("Found descriptor " + tableDescriptor + " through " + incrBackupId); 183 184 TableName newTableName = newTableNames[i]; 185 TableDescriptor newTableDescriptor = admin.getDescriptor(newTableName); 186 List<ColumnFamilyDescriptor> families = Arrays.asList(tableDescriptor.getColumnFamilies()); 187 List<ColumnFamilyDescriptor> existingFamilies = 188 Arrays.asList(newTableDescriptor.getColumnFamilies()); 189 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(newTableDescriptor); 190 boolean schemaChangeNeeded = false; 191 for (ColumnFamilyDescriptor family : families) { 192 if (!existingFamilies.contains(family)) { 193 builder.setColumnFamily(family); 194 schemaChangeNeeded = true; 195 } 196 } 197 for (ColumnFamilyDescriptor family : existingFamilies) { 198 if (!families.contains(family)) { 199 builder.removeColumnFamily(family.getName()); 200 schemaChangeNeeded = true; 201 } 202 } 203 if (schemaChangeNeeded) { 204 modifyTableSync(conn, builder.build()); 205 LOG.info("Changed " + newTableDescriptor.getTableName() + " to: " + newTableDescriptor); 206 } 207 } 208 configureForRestoreJob(keepOriginalSplits); 209 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 210 211 restoreService.run(logDirs, tableNames, restoreRootDir, newTableNames, false); 212 } 213 } 214 215 public void fullRestoreTable(Connection conn, Path tableBackupPath, TableName tableName, 216 TableName newTableName, boolean truncateIfExists, boolean isKeepOriginalSplits, 217 String lastIncrBackupId) throws IOException { 218 createAndRestoreTable(conn, tableName, newTableName, tableBackupPath, truncateIfExists, 219 isKeepOriginalSplits, lastIncrBackupId); 220 } 221 222 /** 223 * Returns value represent path for path to backup table snapshot directory: 224 * "/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot" 225 * @param backupRootPath backup root path 226 * @param tableName table name 227 * @param backupId backup Id 228 * @return path for snapshot 229 */ 230 Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) { 231 return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 232 HConstants.SNAPSHOT_DIR_NAME); 233 } 234 235 /** 236 * Returns value represent path for: 237 * ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/ 238 * snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and 239 * 0.98) this path contains .snapshotinfo, .data.manifest (trunk) 240 * @param tableName table name 241 * @return path to table info 242 * @throws IOException exception 243 */ 244 Path getTableInfoPath(TableName tableName) throws IOException { 245 Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 246 Path tableInfoPath = null; 247 248 // can't build the path directly as the timestamp values are different 249 FileStatus[] snapshots = fs.listStatus(tableSnapShotPath, 250 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 251 for (FileStatus snapshot : snapshots) { 252 tableInfoPath = snapshot.getPath(); 253 // SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest"; 254 if (tableInfoPath.getName().endsWith("data.manifest")) { 255 break; 256 } 257 } 258 return tableInfoPath; 259 } 260 261 /** 262 * Get table descriptor 263 * @param tableName is the table backed up 264 * @return {@link TableDescriptor} saved in backup image of the table 265 */ 266 TableDescriptor getTableDesc(TableName tableName) throws IOException { 267 Path tableInfoPath = this.getTableInfoPath(tableName); 268 SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath); 269 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc); 270 if ( 271 SnapshotDescriptionUtils.isExpiredSnapshot(desc.getTtl(), desc.getCreationTime(), 272 EnvironmentEdgeManager.currentTime()) 273 ) { 274 throw new SnapshotTTLExpiredException(ProtobufUtil.createSnapshotDesc(desc)); 275 } 276 TableDescriptor tableDescriptor = manifest.getTableDescriptor(); 277 if (!tableDescriptor.getTableName().equals(tableName)) { 278 LOG.error("couldn't find Table Desc for table: " + tableName + " under tableInfoPath: " 279 + tableInfoPath.toString()); 280 LOG.error( 281 "tableDescriptor.getNameAsString() = " + tableDescriptor.getTableName().getNameAsString()); 282 throw new FileNotFoundException("couldn't find Table Desc for table: " + tableName 283 + " under tableInfoPath: " + tableInfoPath.toString()); 284 } 285 return tableDescriptor; 286 } 287 288 private TableDescriptor getTableDescriptor(FileSystem fileSys, TableName tableName, 289 String lastIncrBackupId) throws IOException { 290 if (lastIncrBackupId != null) { 291 String target = 292 BackupUtils.getTableBackupDir(backupRootPath.toString(), lastIncrBackupId, tableName); 293 return FSTableDescriptors.getTableDescriptorFromFs(fileSys, new Path(target)); 294 } 295 return null; 296 } 297 298 private void createAndRestoreTable(Connection conn, TableName tableName, TableName newTableName, 299 Path tableBackupPath, boolean truncateIfExists, boolean isKeepOriginalSplits, 300 String lastIncrBackupId) throws IOException { 301 if (newTableName == null) { 302 newTableName = tableName; 303 } 304 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 305 306 // get table descriptor first 307 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, lastIncrBackupId); 308 if (tableDescriptor != null) { 309 LOG.debug("Retrieved descriptor: " + tableDescriptor + " thru " + lastIncrBackupId); 310 } 311 312 if (tableDescriptor == null) { 313 Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 314 if (fileSys.exists(tableSnapshotPath)) { 315 // snapshot path exist means the backup path is in HDFS 316 // check whether snapshot dir already recorded for target table 317 if (snapshotMap.get(tableName) != null) { 318 SnapshotDescription desc = 319 SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath); 320 SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc); 321 if ( 322 SnapshotDescriptionUtils.isExpiredSnapshot(desc.getTtl(), desc.getCreationTime(), 323 EnvironmentEdgeManager.currentTime()) 324 ) { 325 throw new SnapshotTTLExpiredException(ProtobufUtil.createSnapshotDesc(desc)); 326 } 327 tableDescriptor = manifest.getTableDescriptor(); 328 } else { 329 tableDescriptor = getTableDesc(tableName); 330 snapshotMap.put(tableName, getTableInfoPath(tableName)); 331 } 332 if (tableDescriptor == null) { 333 LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost"); 334 } 335 } else { 336 throw new IOException( 337 "Table snapshot directory: " + tableSnapshotPath + " does not exist."); 338 } 339 } 340 341 Path tableArchivePath = getTableArchivePath(tableName); 342 if (tableArchivePath == null) { 343 if (tableDescriptor != null) { 344 // find table descriptor but no archive dir means the table is empty, create table and exit 345 if (LOG.isDebugEnabled()) { 346 LOG.debug("find table descriptor but no archive dir for table " + tableName 347 + ", will only create table"); 348 } 349 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 350 checkAndCreateTable(conn, newTableName, null, tableDescriptor, truncateIfExists); 351 return; 352 } else { 353 throw new IllegalStateException( 354 "Cannot restore hbase table because directory '" + " tableArchivePath is null."); 355 } 356 } 357 358 if (tableDescriptor == null) { 359 tableDescriptor = TableDescriptorBuilder.newBuilder(newTableName).build(); 360 } else { 361 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 362 } 363 364 // record all region dirs: 365 // load all files in dir 366 try { 367 ArrayList<Path> regionPathList = getRegionList(tableName); 368 369 // should only try to create the table with all region informations, so we could pre-split 370 // the regions in fine grain 371 checkAndCreateTable(conn, newTableName, regionPathList, tableDescriptor, truncateIfExists); 372 configureForRestoreJob(isKeepOriginalSplits); 373 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 374 Path[] paths = new Path[regionPathList.size()]; 375 regionPathList.toArray(paths); 376 restoreService.run(paths, new TableName[] { tableName }, restoreRootDir, 377 new TableName[] { newTableName }, true); 378 379 } catch (Exception e) { 380 LOG.error(e.toString(), e); 381 throw new IllegalStateException("Cannot restore hbase table", e); 382 } 383 } 384 385 /** 386 * Gets region list 387 * @param tableArchivePath table archive path 388 * @return RegionList region list 389 * @throws IOException exception 390 */ 391 ArrayList<Path> getRegionList(Path tableArchivePath) throws IOException { 392 ArrayList<Path> regionDirList = new ArrayList<>(); 393 FileStatus[] children = fs.listStatus(tableArchivePath); 394 for (FileStatus childStatus : children) { 395 // here child refer to each region(Name) 396 Path child = childStatus.getPath(); 397 regionDirList.add(child); 398 } 399 return regionDirList; 400 } 401 402 /** 403 * Calculate region boundaries and add all the column families to the table descriptor 404 * @param regionDirList region dir list 405 * @return a set of keys to store the boundaries 406 */ 407 byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException { 408 TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR); 409 // Build a set of keys to store the boundaries 410 // calculate region boundaries and add all the column families to the table descriptor 411 for (Path regionDir : regionDirList) { 412 LOG.debug("Parsing region dir: " + regionDir); 413 Path hfofDir = regionDir; 414 415 if (!fs.exists(hfofDir)) { 416 LOG.warn("HFileOutputFormat dir " + hfofDir + " not found"); 417 } 418 419 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); 420 if (familyDirStatuses == null) { 421 throw new IOException("No families found in " + hfofDir); 422 } 423 424 for (FileStatus stat : familyDirStatuses) { 425 if (!stat.isDirectory()) { 426 LOG.warn("Skipping non-directory " + stat.getPath()); 427 continue; 428 } 429 boolean isIgnore = false; 430 String pathName = stat.getPath().getName(); 431 for (String ignore : ignoreDirs) { 432 if (pathName.contains(ignore)) { 433 LOG.warn("Skipping non-family directory" + pathName); 434 isIgnore = true; 435 break; 436 } 437 } 438 if (isIgnore) { 439 continue; 440 } 441 Path familyDir = stat.getPath(); 442 LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]"); 443 // Skip _logs, etc 444 if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) { 445 continue; 446 } 447 448 // start to parse hfile inside one family dir 449 Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); 450 for (Path hfile : hfiles) { 451 if ( 452 hfile.getName().startsWith("_") || hfile.getName().startsWith(".") 453 || StoreFileInfo.isReference(hfile.getName()) 454 || HFileLink.isHFileLink(hfile.getName()) 455 ) { 456 continue; 457 } 458 HFile.Reader reader = HFile.createReader(fs, hfile, conf); 459 final byte[] first, last; 460 try { 461 if (reader.getEntries() == 0) { 462 LOG.debug("Skipping hfile with 0 entries: " + hfile); 463 continue; 464 } 465 first = reader.getFirstRowKey().get(); 466 last = reader.getLastRowKey().get(); 467 LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first=" 468 + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last)); 469 470 // To eventually infer start key-end key boundaries 471 Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0; 472 map.put(first, value + 1); 473 value = map.containsKey(last) ? (Integer) map.get(last) : 0; 474 map.put(last, value - 1); 475 } finally { 476 reader.close(); 477 } 478 } 479 } 480 } 481 return BulkLoadHFilesTool.inferBoundaries(map); 482 } 483 484 /** 485 * Prepare the table for bulkload, most codes copied from {@code createTable} method in 486 * {@code BulkLoadHFilesTool}. 487 * @param conn connection 488 * @param targetTableName target table name 489 * @param regionDirList region directory list 490 * @param htd table descriptor 491 * @param truncateIfExists truncates table if exists 492 * @throws IOException exception 493 */ 494 private void checkAndCreateTable(Connection conn, TableName targetTableName, 495 ArrayList<Path> regionDirList, TableDescriptor htd, boolean truncateIfExists) 496 throws IOException { 497 try (Admin admin = conn.getAdmin()) { 498 boolean createNew = false; 499 if (admin.tableExists(targetTableName)) { 500 if (truncateIfExists) { 501 LOG.info( 502 "Truncating exising target table '" + targetTableName + "', preserving region splits"); 503 admin.disableTable(targetTableName); 504 admin.truncateTable(targetTableName, true); 505 } else { 506 LOG.info("Using exising target table '" + targetTableName + "'"); 507 } 508 } else { 509 createNew = true; 510 } 511 if (createNew) { 512 LOG.info("Creating target table '" + targetTableName + "'"); 513 byte[][] keys = null; 514 try { 515 if (regionDirList == null || regionDirList.size() == 0) { 516 admin.createTable(htd); 517 } else { 518 keys = generateBoundaryKeys(regionDirList); 519 if (keys.length > 0) { 520 // create table using table descriptor and region boundaries 521 admin.createTable(htd, keys); 522 } else { 523 admin.createTable(htd); 524 } 525 } 526 } catch (NamespaceNotFoundException e) { 527 LOG.warn("There was no namespace and the same will be created"); 528 String namespaceAsString = targetTableName.getNamespaceAsString(); 529 LOG.info("Creating target namespace '" + namespaceAsString + "'"); 530 admin.createNamespace(NamespaceDescriptor.create(namespaceAsString).build()); 531 if (null == keys) { 532 admin.createTable(htd); 533 } else { 534 admin.createTable(htd, keys); 535 } 536 } 537 538 } 539 long startTime = EnvironmentEdgeManager.currentTime(); 540 while (!admin.isTableAvailable(targetTableName)) { 541 try { 542 Thread.sleep(100); 543 } catch (InterruptedException ie) { 544 Thread.currentThread().interrupt(); 545 } 546 if (EnvironmentEdgeManager.currentTime() - startTime > TABLE_AVAILABILITY_WAIT_TIME) { 547 throw new IOException("Time out " + TABLE_AVAILABILITY_WAIT_TIME + "ms expired, table " 548 + targetTableName + " is still not available"); 549 } 550 } 551 } 552 } 553 554 private void configureForRestoreJob(boolean keepOriginalSplits) { 555 conf.setBoolean(RestoreJob.KEEP_ORIGINAL_SPLITS_KEY, keepOriginalSplits); 556 conf.set(RestoreJob.BACKUP_ROOT_PATH_KEY, backupRootPath.toString()); 557 } 558}