001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.impl; 019 020import static org.apache.hadoop.hbase.backup.BackupInfo.withState; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.Collections; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import java.util.Objects; 029import java.util.Set; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.FileStatus; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.backup.BackupInfo; 036import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase; 037import org.apache.hadoop.hbase.backup.BackupInfo.BackupState; 038import org.apache.hadoop.hbase.backup.BackupRequest; 039import org.apache.hadoop.hbase.backup.BackupRestoreConstants; 040import org.apache.hadoop.hbase.backup.BackupType; 041import org.apache.hadoop.hbase.backup.HBackupFileSystem; 042import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage; 043import org.apache.hadoop.hbase.client.Admin; 044import org.apache.hadoop.hbase.client.Connection; 045import org.apache.hadoop.hbase.util.CommonFSUtils; 046import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 047import org.apache.yetus.audience.InterfaceAudience; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051/** 052 * Base class for backup operation. Concrete implementation for full and incremental backup are 053 * delegated to corresponding sub-classes: {@link FullTableBackupClient} and 054 * {@link IncrementalTableBackupClient} 055 */ 056@InterfaceAudience.Private 057public abstract class TableBackupClient { 058 059 public static final String BACKUP_CLIENT_IMPL_CLASS = "backup.client.impl.class"; 060 061 public static final String BACKUP_TEST_MODE_STAGE = "backup.test.mode.stage"; 062 063 private static final Logger LOG = LoggerFactory.getLogger(TableBackupClient.class); 064 065 protected Configuration conf; 066 protected Connection conn; 067 protected String backupId; 068 protected List<TableName> tableList; 069 protected Map<String, Long> newTimestamps = null; 070 071 protected BackupManager backupManager; 072 protected BackupInfo backupInfo; 073 protected FileSystem fs; 074 075 public TableBackupClient() { 076 } 077 078 public TableBackupClient(final Connection conn, final String backupId, BackupRequest request) 079 throws IOException { 080 init(conn, backupId, request); 081 } 082 083 public void init(final Connection conn, final String backupId, BackupRequest request) 084 throws IOException { 085 if (request.getBackupType() == BackupType.FULL) { 086 backupManager = new BackupManager(conn, conn.getConfiguration()); 087 } else { 088 backupManager = new IncrementalBackupManager(conn, conn.getConfiguration()); 089 } 090 this.backupId = backupId; 091 this.tableList = request.getTableList(); 092 this.conn = conn; 093 this.conf = conn.getConfiguration(); 094 this.fs = CommonFSUtils.getCurrentFileSystem(conf); 095 backupInfo = backupManager.createBackupInfo(backupId, request.getBackupType(), tableList, 096 request.getTargetRootDir(), request.getTotalTasks(), request.getBandwidth(), 097 request.getNoChecksumVerify()); 098 if (tableList == null || tableList.isEmpty()) { 099 this.tableList = new ArrayList<>(backupInfo.getTables()); 100 } 101 // Start new session 102 backupManager.startBackupSession(); 103 } 104 105 /** 106 * Begin the overall backup. 107 * @param backupInfo backup info 108 * @throws IOException exception 109 */ 110 protected void beginBackup(BackupManager backupManager, BackupInfo backupInfo) 111 throws IOException { 112 113 BackupSystemTable.snapshot(conn); 114 backupManager.setBackupInfo(backupInfo); 115 // set the start timestamp of the overall backup 116 long startTs = EnvironmentEdgeManager.currentTime(); 117 backupInfo.setStartTs(startTs); 118 // set overall backup status: ongoing 119 backupInfo.setState(BackupState.RUNNING); 120 backupInfo.setPhase(BackupPhase.REQUEST); 121 LOG.info("Backup " + backupInfo.getBackupId() + " started at " + startTs + "."); 122 123 backupManager.updateBackupInfo(backupInfo); 124 if (LOG.isDebugEnabled()) { 125 LOG.debug("Backup session " + backupInfo.getBackupId() + " has been started."); 126 } 127 } 128 129 protected String getMessage(Exception e) { 130 String msg = e.getMessage(); 131 if (msg == null || msg.equals("")) { 132 msg = e.getClass().getName(); 133 } 134 return msg; 135 } 136 137 /** 138 * Delete HBase snapshot for backup. 139 * @param backupInfo backup info 140 * @throws IOException exception 141 */ 142 protected static void deleteSnapshots(final Connection conn, BackupInfo backupInfo, 143 Configuration conf) throws IOException { 144 LOG.debug("Trying to delete snapshot for full backup."); 145 for (String snapshotName : backupInfo.getSnapshotNames()) { 146 if (snapshotName == null) { 147 continue; 148 } 149 LOG.debug("Trying to delete snapshot: " + snapshotName); 150 151 try (Admin admin = conn.getAdmin()) { 152 admin.deleteSnapshot(snapshotName); 153 } 154 LOG.debug("Deleting the snapshot " + snapshotName + " for backup " + backupInfo.getBackupId() 155 + " succeeded."); 156 } 157 } 158 159 /** 160 * Clean up directories with prefix "exportSnapshot-", which are generated when exporting 161 * snapshots. 162 * @throws IOException exception 163 */ 164 protected static void cleanupExportSnapshotLog(Configuration conf) throws IOException { 165 FileSystem fs = CommonFSUtils.getCurrentFileSystem(conf); 166 Path stagingDir = new Path( 167 conf.get(BackupRestoreConstants.CONF_STAGING_ROOT, fs.getWorkingDirectory().toString())); 168 FileStatus[] files = CommonFSUtils.listStatus(fs, stagingDir); 169 if (files == null) { 170 return; 171 } 172 for (FileStatus file : files) { 173 if (file.getPath().getName().startsWith("exportSnapshot-")) { 174 LOG.debug("Delete log files of exporting snapshot: " + file.getPath().getName()); 175 if (CommonFSUtils.delete(fs, file.getPath(), true) == false) { 176 LOG.warn("Can not delete " + file.getPath()); 177 } 178 } 179 } 180 } 181 182 /** 183 * Clean up the uncompleted data at target directory if the ongoing backup has already entered the 184 * copy phase. 185 */ 186 protected static void cleanupTargetDir(BackupInfo backupInfo, Configuration conf) { 187 try { 188 // clean up the uncompleted data at target directory if the ongoing backup has already entered 189 // the copy phase 190 LOG.debug("Trying to cleanup up target dir. Current backup phase: " + backupInfo.getPhase()); 191 if ( 192 backupInfo.getPhase().equals(BackupPhase.SNAPSHOTCOPY) 193 || backupInfo.getPhase().equals(BackupPhase.INCREMENTAL_COPY) 194 || backupInfo.getPhase().equals(BackupPhase.STORE_MANIFEST) 195 ) { 196 FileSystem outputFs = FileSystem.get(new Path(backupInfo.getBackupRootDir()).toUri(), conf); 197 198 // now treat one backup as a transaction, clean up data that has been partially copied at 199 // table level 200 for (TableName table : backupInfo.getTables()) { 201 Path targetDirPath = new Path(HBackupFileSystem 202 .getTableBackupDir(backupInfo.getBackupRootDir(), backupInfo.getBackupId(), table)); 203 if (outputFs.delete(targetDirPath, true)) { 204 LOG.debug( 205 "Cleaning up uncompleted backup data at " + targetDirPath.toString() + " done."); 206 } else { 207 LOG.debug("No data has been copied to " + targetDirPath.toString() + "."); 208 } 209 210 Path tableDir = targetDirPath.getParent(); 211 FileStatus[] backups = CommonFSUtils.listStatus(outputFs, tableDir); 212 if (backups == null || backups.length == 0) { 213 outputFs.delete(tableDir, true); 214 LOG.debug(tableDir.toString() + " is empty, remove it."); 215 } 216 } 217 } 218 219 } catch (IOException e1) { 220 LOG.error("Cleaning up uncompleted backup data of " + backupInfo.getBackupId() + " at " 221 + backupInfo.getBackupRootDir() + " failed due to " + e1.getMessage() + "."); 222 } 223 } 224 225 /** 226 * Fail the overall backup. 227 * @param backupInfo backup info 228 * @param e exception 229 * @throws IOException exception 230 */ 231 protected void failBackup(Connection conn, BackupInfo backupInfo, BackupManager backupManager, 232 Exception e, String msg, BackupType type, Configuration conf) throws IOException { 233 try { 234 LOG.error(msg + getMessage(e), e); 235 // If this is a cancel exception, then we've already cleaned. 236 // set the failure timestamp of the overall backup 237 backupInfo.setCompleteTs(EnvironmentEdgeManager.currentTime()); 238 // set failure message 239 backupInfo.setFailedMsg(e.getMessage()); 240 // set overall backup status: failed 241 backupInfo.setState(BackupState.FAILED); 242 // compose the backup failed data 243 String backupFailedData = "BackupId=" + backupInfo.getBackupId() + ",startts=" 244 + backupInfo.getStartTs() + ",failedts=" + backupInfo.getCompleteTs() + ",failedphase=" 245 + backupInfo.getPhase() + ",failedmessage=" + backupInfo.getFailedMsg(); 246 LOG.error(backupFailedData); 247 cleanupAndRestoreBackupSystem(conn, backupInfo, conf); 248 // If backup session is updated to FAILED state - means we 249 // processed recovery already. 250 backupManager.updateBackupInfo(backupInfo); 251 backupManager.finishBackupSession(); 252 LOG.error("Backup " + backupInfo.getBackupId() + " failed."); 253 } catch (IOException ee) { 254 LOG.error("Please run backup repair tool manually to restore backup system integrity"); 255 throw ee; 256 } 257 } 258 259 public static void cleanupAndRestoreBackupSystem(Connection conn, BackupInfo backupInfo, 260 Configuration conf) throws IOException { 261 BackupType type = backupInfo.getType(); 262 // if full backup, then delete HBase snapshots if there already are snapshots taken 263 // and also clean up export snapshot log files if exist 264 if (type == BackupType.FULL) { 265 deleteSnapshots(conn, backupInfo, conf); 266 cleanupExportSnapshotLog(conf); 267 } 268 BackupSystemTable.restoreFromSnapshot(conn); 269 BackupSystemTable.deleteSnapshot(conn); 270 // clean up the uncompleted data at target directory if the ongoing backup has already entered 271 // the copy phase 272 // For incremental backup, DistCp logs will be cleaned with the targetDir. 273 cleanupTargetDir(backupInfo, conf); 274 } 275 276 /** 277 * Creates a manifest based on the provided info, and store it in the backup-specific directory. 278 * @param backupInfo The current backup info 279 * @throws IOException exception 280 */ 281 protected void addManifest(BackupInfo backupInfo, BackupType type, Configuration conf) 282 throws IOException { 283 // set the overall backup phase : store manifest 284 backupInfo.setPhase(BackupPhase.STORE_MANIFEST); 285 286 BackupManifest manifest = new BackupManifest(backupInfo); 287 if (type == BackupType.INCREMENTAL) { 288 // set the table region server start and end timestamps for incremental backup 289 manifest.setIncrTimestampMap(backupInfo.getIncrTimestampMap()); 290 } 291 List<BackupImage> ancestors = getAncestors(backupInfo); 292 for (BackupImage image : ancestors) { 293 manifest.addDependentImage(image); 294 } 295 manifest.store(conf); 296 } 297 298 /** 299 * Gets the direct ancestors of the currently being created backup. 300 * @param backupInfo The backup info for the backup being created 301 */ 302 protected List<BackupImage> getAncestors(BackupInfo backupInfo) throws IOException { 303 LOG.debug("Getting the direct ancestors of the current backup {}", backupInfo.getBackupId()); 304 305 // Full backups do not have ancestors 306 if (backupInfo.getType() == BackupType.FULL) { 307 LOG.debug("Current backup is a full backup, no direct ancestor for it."); 308 return Collections.emptyList(); 309 } 310 311 List<BackupImage> ancestors = new ArrayList<>(); 312 Set<TableName> tablesToCover = new HashSet<>(backupInfo.getTables()); 313 314 // Go over the backup history list from newest to oldest 315 List<BackupInfo> allHistoryList = 316 backupManager.getBackupHistory(withState(BackupState.COMPLETE)); 317 for (BackupInfo backup : allHistoryList) { 318 // If the image has a different rootDir, it cannot be an ancestor. 319 if (!Objects.equals(backup.getBackupRootDir(), backupInfo.getBackupRootDir())) { 320 continue; 321 } 322 323 BackupImage.Builder builder = BackupImage.newBuilder(); 324 BackupImage image = builder.withBackupId(backup.getBackupId()).withType(backup.getType()) 325 .withRootDir(backup.getBackupRootDir()).withTableList(backup.getTableNames()) 326 .withStartTime(backup.getStartTs()).withCompleteTime(backup.getCompleteTs()).build(); 327 328 // The ancestors consist of the most recent FULL backups that cover the list of tables 329 // required in the new backup and all INCREMENTAL backups that came after one of those FULL 330 // backups. 331 if (backup.getType().equals(BackupType.INCREMENTAL)) { 332 ancestors.add(image); 333 LOG.debug("Dependent incremental backup image: {BackupID={}}", image.getBackupId()); 334 } else { 335 if (tablesToCover.removeAll(new HashSet<>(image.getTableNames()))) { 336 ancestors.add(image); 337 LOG.debug("Dependent full backup image: {BackupID={}}", image.getBackupId()); 338 339 if (tablesToCover.isEmpty()) { 340 LOG.debug("Got {} ancestors for the current backup.", ancestors.size()); 341 return Collections.unmodifiableList(ancestors); 342 } 343 } 344 } 345 } 346 347 throw new IllegalStateException( 348 "Unable to find full backup that contains tables: " + tablesToCover); 349 } 350 351 /** 352 * Get backup request meta data dir as string. 353 * @param backupInfo backup info 354 * @return meta data dir 355 */ 356 protected String obtainBackupMetaDataStr(BackupInfo backupInfo) { 357 StringBuilder sb = new StringBuilder(); 358 sb.append("type=" + backupInfo.getType() + ",tablelist="); 359 for (TableName table : backupInfo.getTables()) { 360 sb.append(table + ";"); 361 } 362 if (sb.lastIndexOf(";") > 0) { 363 sb.delete(sb.lastIndexOf(";"), sb.lastIndexOf(";") + 1); 364 } 365 sb.append(",targetRootDir=" + backupInfo.getBackupRootDir()); 366 367 return sb.toString(); 368 } 369 370 /** 371 * Complete the overall backup. 372 * @param backupInfo backup info 373 * @throws IOException exception 374 */ 375 protected void completeBackup(final Connection conn, BackupInfo backupInfo, BackupType type, 376 Configuration conf) throws IOException { 377 // set the complete timestamp of the overall backup 378 backupInfo.setCompleteTs(EnvironmentEdgeManager.currentTime()); 379 // set overall backup status: complete 380 backupInfo.setState(BackupState.COMPLETE); 381 backupInfo.setProgress(100); 382 // add and store the manifest for the backup 383 addManifest(backupInfo, type, conf); 384 385 // compose the backup complete data 386 String backupCompleteData = 387 obtainBackupMetaDataStr(backupInfo) + ",startts=" + backupInfo.getStartTs() + ",completets=" 388 + backupInfo.getCompleteTs() + ",bytescopied=" + backupInfo.getTotalBytesCopied(); 389 if (LOG.isDebugEnabled()) { 390 LOG.debug("Backup " + backupInfo.getBackupId() + " finished: " + backupCompleteData); 391 } 392 393 // when full backup is done: 394 // - delete HBase snapshot 395 // - clean up directories with prefix "exportSnapshot-", which are generated when exporting 396 // snapshots 397 // incremental backups use distcp, which handles cleaning up its own directories 398 if (type == BackupType.FULL) { 399 deleteSnapshots(conn, backupInfo, conf); 400 cleanupExportSnapshotLog(conf); 401 } 402 BackupSystemTable.deleteSnapshot(conn); 403 backupManager.updateBackupInfo(backupInfo); 404 405 // Finish active session 406 backupManager.finishBackupSession(); 407 408 LOG.info("Backup " + backupInfo.getBackupId() + " completed."); 409 } 410 411 /** 412 * Backup request execution. 413 * @throws IOException if the execution of the backup fails 414 */ 415 public abstract void execute() throws IOException; 416 417 protected Stage getTestStage() { 418 return Stage.valueOf("stage_" + conf.getInt(BACKUP_TEST_MODE_STAGE, 0)); 419 } 420 421 protected void failStageIf(Stage stage) throws IOException { 422 Stage current = getTestStage(); 423 if (current == stage) { 424 throw new IOException("Failed stage " + stage + " in testing"); 425 } 426 } 427 428 public enum Stage { 429 stage_0, 430 stage_1, 431 stage_2, 432 stage_3, 433 stage_4 434 } 435}