001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.impl; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.Path; 029import org.apache.hadoop.hbase.HConstants; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.backup.BackupHFileCleaner; 032import org.apache.hadoop.hbase.backup.BackupInfo; 033import org.apache.hadoop.hbase.backup.BackupInfo.BackupState; 034import org.apache.hadoop.hbase.backup.BackupObserver; 035import org.apache.hadoop.hbase.backup.BackupRestoreConstants; 036import org.apache.hadoop.hbase.backup.BackupType; 037import org.apache.hadoop.hbase.backup.HBackupFileSystem; 038import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage; 039import org.apache.hadoop.hbase.backup.master.BackupLogCleaner; 040import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager; 041import org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager; 042import org.apache.hadoop.hbase.client.Admin; 043import org.apache.hadoop.hbase.client.Connection; 044import org.apache.hadoop.hbase.client.TableDescriptor; 045import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 046import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; 047import org.apache.hadoop.hbase.procedure.ProcedureManagerHost; 048import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 049import org.apache.hadoop.hbase.util.Pair; 050import org.apache.yetus.audience.InterfaceAudience; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054/** 055 * Handles backup requests, creates backup info records in backup system table to keep track of 056 * backup sessions, dispatches backup request. 057 */ 058@InterfaceAudience.Private 059public class BackupManager implements Closeable { 060 // in seconds 061 public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY = 062 "hbase.backup.exclusive.op.timeout.seconds"; 063 // In seconds 064 private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600; 065 private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class); 066 067 protected Configuration conf = null; 068 protected BackupInfo backupInfo = null; 069 protected BackupSystemTable systemTable; 070 protected final Connection conn; 071 072 /** 073 * Backup manager constructor. 074 * @param conn connection 075 * @param conf configuration 076 * @throws IOException exception 077 */ 078 public BackupManager(Connection conn, Configuration conf) throws IOException { 079 if ( 080 !conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY, 081 BackupRestoreConstants.BACKUP_ENABLE_DEFAULT) 082 ) { 083 throw new BackupException("HBase backup is not enabled. Check your " 084 + BackupRestoreConstants.BACKUP_ENABLE_KEY + " setting."); 085 } 086 this.conf = conf; 087 this.conn = conn; 088 this.systemTable = new BackupSystemTable(conn); 089 } 090 091 /** 092 * Returns backup info 093 */ 094 protected BackupInfo getBackupInfo() { 095 return backupInfo; 096 } 097 098 /** 099 * This method modifies the master's configuration in order to inject backup-related features 100 * (TESTs only) 101 * @param conf configuration 102 */ 103 public static void decorateMasterConfiguration(Configuration conf) { 104 if (!isBackupEnabled(conf)) { 105 return; 106 } 107 // Add WAL archive cleaner plug-in 108 String plugins = conf.get(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS); 109 String cleanerClass = BackupLogCleaner.class.getCanonicalName(); 110 if (!plugins.contains(cleanerClass)) { 111 conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass); 112 } 113 114 String classes = conf.get(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY); 115 String masterProcedureClass = LogRollMasterProcedureManager.class.getName(); 116 if (classes == null) { 117 conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, masterProcedureClass); 118 } else if (!classes.contains(masterProcedureClass)) { 119 conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, 120 classes + "," + masterProcedureClass); 121 } 122 123 plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS); 124 conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, 125 (plugins == null ? "" : plugins + ",") + BackupHFileCleaner.class.getName()); 126 if (LOG.isDebugEnabled()) { 127 LOG.debug( 128 "Added log cleaner: {}. Added master procedure manager: {}." 129 + "Added master procedure manager: {}", 130 cleanerClass, masterProcedureClass, BackupHFileCleaner.class.getName()); 131 } 132 } 133 134 /** 135 * This method modifies the Region Server configuration in order to inject backup-related features 136 * TESTs only. 137 * @param conf configuration 138 */ 139 public static void decorateRegionServerConfiguration(Configuration conf) { 140 if (!isBackupEnabled(conf)) { 141 return; 142 } 143 144 String classes = conf.get(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY); 145 String regionProcedureClass = LogRollRegionServerProcedureManager.class.getName(); 146 if (classes == null) { 147 conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, regionProcedureClass); 148 } else if (!classes.contains(regionProcedureClass)) { 149 conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, 150 classes + "," + regionProcedureClass); 151 } 152 String coproc = conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY); 153 String regionObserverClass = BackupObserver.class.getName(); 154 conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, 155 (coproc == null ? "" : coproc + ",") + regionObserverClass); 156 if (LOG.isDebugEnabled()) { 157 LOG.debug("Added region procedure manager: {}. Added region observer: {}", 158 regionProcedureClass, regionObserverClass); 159 } 160 } 161 162 public static boolean isBackupEnabled(Configuration conf) { 163 return conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY, 164 BackupRestoreConstants.BACKUP_ENABLE_DEFAULT); 165 } 166 167 /** 168 * Get configuration 169 */ 170 Configuration getConf() { 171 return conf; 172 } 173 174 /** 175 * Stop all the work of backup. 176 */ 177 @Override 178 public void close() { 179 if (systemTable != null) { 180 try { 181 systemTable.close(); 182 } catch (Exception e) { 183 LOG.error(e.toString(), e); 184 } 185 } 186 } 187 188 /** 189 * Creates a backup info based on input backup request. 190 * @param backupId backup id 191 * @param type type 192 * @param tableList table list 193 * @param targetRootDir root dir 194 * @param workers number of parallel workers 195 * @param bandwidth bandwidth per worker in MB per sec 196 * @throws BackupException exception 197 */ 198 public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList, 199 String targetRootDir, int workers, long bandwidth) throws BackupException { 200 if (targetRootDir == null) { 201 throw new BackupException("Wrong backup request parameter: target backup root directory"); 202 } 203 204 if (type == BackupType.FULL && (tableList == null || tableList.isEmpty())) { 205 // If table list is null for full backup, which means backup all tables. Then fill the table 206 // list with all user tables from meta. It no table available, throw the request exception. 207 List<TableDescriptor> htds = null; 208 try (Admin admin = conn.getAdmin()) { 209 htds = admin.listTableDescriptors(); 210 } catch (Exception e) { 211 throw new BackupException(e); 212 } 213 214 if (htds == null) { 215 throw new BackupException("No table exists for full backup of all tables."); 216 } else { 217 tableList = new ArrayList<>(); 218 for (TableDescriptor hTableDescriptor : htds) { 219 TableName tn = hTableDescriptor.getTableName(); 220 if (tn.equals(BackupSystemTable.getTableName(conf))) { 221 // skip backup system table 222 continue; 223 } 224 tableList.add(hTableDescriptor.getTableName()); 225 } 226 227 LOG.info("Full backup all the tables available in the cluster: {}", tableList); 228 } 229 } 230 231 // there are one or more tables in the table list 232 backupInfo = new BackupInfo(backupId, type, tableList.toArray(new TableName[tableList.size()]), 233 targetRootDir); 234 backupInfo.setBandwidth(bandwidth); 235 backupInfo.setWorkers(workers); 236 return backupInfo; 237 } 238 239 /** 240 * Check if any ongoing backup. Currently, we only reply on checking status in backup system 241 * table. We need to consider to handle the case of orphan records in the future. Otherwise, all 242 * the coming request will fail. 243 * @return the ongoing backup id if on going backup exists, otherwise null 244 * @throws IOException exception 245 */ 246 private String getOngoingBackupId() throws IOException { 247 ArrayList<BackupInfo> sessions = systemTable.getBackupInfos(BackupState.RUNNING); 248 if (sessions.size() == 0) { 249 return null; 250 } 251 return sessions.get(0).getBackupId(); 252 } 253 254 /** 255 * Start the backup manager service. 256 * @throws IOException exception 257 */ 258 public void initialize() throws IOException { 259 String ongoingBackupId = this.getOngoingBackupId(); 260 if (ongoingBackupId != null) { 261 LOG.info("There is a ongoing backup {}" 262 + ". Can not launch new backup until no ongoing backup remains.", ongoingBackupId); 263 throw new BackupException("There is ongoing backup seesion."); 264 } 265 } 266 267 public void setBackupInfo(BackupInfo backupInfo) { 268 this.backupInfo = backupInfo; 269 } 270 271 /** 272 * Get direct ancestors of the current backup. 273 * @param backupInfo The backup info for the current backup 274 * @return The ancestors for the current backup 275 * @throws IOException exception 276 */ 277 public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo) throws IOException { 278 LOG.debug("Getting the direct ancestors of the current backup {}", backupInfo.getBackupId()); 279 280 ArrayList<BackupImage> ancestors = new ArrayList<>(); 281 282 // full backup does not have ancestor 283 if (backupInfo.getType() == BackupType.FULL) { 284 LOG.debug("Current backup is a full backup, no direct ancestor for it."); 285 return ancestors; 286 } 287 288 // get all backup history list in descending order 289 ArrayList<BackupInfo> allHistoryList = getBackupHistory(true); 290 for (BackupInfo backup : allHistoryList) { 291 292 BackupImage.Builder builder = BackupImage.newBuilder(); 293 294 BackupImage image = builder.withBackupId(backup.getBackupId()).withType(backup.getType()) 295 .withRootDir(backup.getBackupRootDir()).withTableList(backup.getTableNames()) 296 .withStartTime(backup.getStartTs()).withCompleteTime(backup.getCompleteTs()).build(); 297 298 // Only direct ancestors for a backup are required and not entire history of backup for this 299 // table resulting in verifying all of the previous backups which is unnecessary and backup 300 // paths need not be valid beyond the lifetime of a backup. 301 // 302 // RootDir is way of grouping a single backup including one full and many incremental backups 303 if (!image.getRootDir().equals(backupInfo.getBackupRootDir())) { 304 continue; 305 } 306 307 // add the full backup image as an ancestor until the last incremental backup 308 if (backup.getType().equals(BackupType.FULL)) { 309 // check the backup image coverage, if previous image could be covered by the newer ones, 310 // then no need to add 311 if (!BackupManifest.canCoverImage(ancestors, image)) { 312 ancestors.add(image); 313 } 314 } else { 315 // found last incremental backup, if previously added full backup ancestor images can cover 316 // it, then this incremental ancestor is not the dependent of the current incremental 317 // backup, that is to say, this is the backup scope boundary of current table set. 318 // Otherwise, this incremental backup ancestor is the dependent ancestor of the ongoing 319 // incremental backup 320 if (BackupManifest.canCoverImage(ancestors, image)) { 321 LOG.debug("Met the backup boundary of the current table set:"); 322 for (BackupImage image1 : ancestors) { 323 LOG.debug(" BackupID={}, BackupDir={}", image1.getBackupId(), image1.getRootDir()); 324 } 325 } else { 326 Path logBackupPath = 327 HBackupFileSystem.getBackupPath(backup.getBackupRootDir(), backup.getBackupId()); 328 LOG.debug( 329 "Current backup has an incremental backup ancestor, " 330 + "touching its image manifest in {}" + " to construct the dependency.", 331 logBackupPath.toString()); 332 BackupManifest lastIncrImgManifest = new BackupManifest(conf, logBackupPath); 333 BackupImage lastIncrImage = lastIncrImgManifest.getBackupImage(); 334 ancestors.add(lastIncrImage); 335 336 LOG.debug("Last dependent incremental backup image: {BackupID={}" + "BackupDir={}}", 337 lastIncrImage.getBackupId(), lastIncrImage.getRootDir()); 338 } 339 } 340 } 341 LOG.debug("Got {} ancestors for the current backup.", ancestors.size()); 342 return ancestors; 343 } 344 345 /** 346 * Get the direct ancestors of this backup for one table involved. 347 * @param backupInfo backup info 348 * @param table table 349 * @return backupImages on the dependency list 350 * @throws IOException exception 351 */ 352 public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo, TableName table) 353 throws IOException { 354 ArrayList<BackupImage> ancestors = getAncestors(backupInfo); 355 ArrayList<BackupImage> tableAncestors = new ArrayList<>(); 356 for (BackupImage image : ancestors) { 357 if (image.hasTable(table)) { 358 tableAncestors.add(image); 359 if (image.getType() == BackupType.FULL) { 360 break; 361 } 362 } 363 } 364 return tableAncestors; 365 } 366 367 /* 368 * backup system table operations 369 */ 370 371 /** 372 * Updates status (state) of a backup session in a persistent store 373 * @param context context 374 * @throws IOException exception 375 */ 376 public void updateBackupInfo(BackupInfo context) throws IOException { 377 systemTable.updateBackupInfo(context); 378 } 379 380 /** 381 * Starts new backup session 382 * @throws IOException if active session already exists 383 */ 384 public void startBackupSession() throws IOException { 385 long startTime = EnvironmentEdgeManager.currentTime(); 386 long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY, 387 DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L; 388 long lastWarningOutputTime = 0; 389 while (EnvironmentEdgeManager.currentTime() - startTime < timeout) { 390 try { 391 systemTable.startBackupExclusiveOperation(); 392 return; 393 } catch (IOException e) { 394 if (e instanceof ExclusiveOperationException) { 395 // sleep, then repeat 396 try { 397 Thread.sleep(1000); 398 } catch (InterruptedException e1) { 399 // Restore the interrupted status 400 Thread.currentThread().interrupt(); 401 } 402 if ( 403 lastWarningOutputTime == 0 404 || (EnvironmentEdgeManager.currentTime() - lastWarningOutputTime) > 60000 405 ) { 406 lastWarningOutputTime = EnvironmentEdgeManager.currentTime(); 407 LOG.warn("Waiting to acquire backup exclusive lock for {}s", 408 +(lastWarningOutputTime - startTime) / 1000); 409 } 410 } else { 411 throw e; 412 } 413 } 414 } 415 throw new IOException( 416 "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s"); 417 } 418 419 /** 420 * Finishes active backup session 421 * @throws IOException if no active session 422 */ 423 public void finishBackupSession() throws IOException { 424 systemTable.finishBackupExclusiveOperation(); 425 } 426 427 /** 428 * Read the last backup start code (timestamp) of last successful backup. Will return null if 429 * there is no startcode stored in backup system table or the value is of length 0. These two 430 * cases indicate there is no successful backup completed so far. 431 * @return the timestamp of a last successful backup 432 * @throws IOException exception 433 */ 434 public String readBackupStartCode() throws IOException { 435 return systemTable.readBackupStartCode(backupInfo.getBackupRootDir()); 436 } 437 438 /** 439 * Write the start code (timestamp) to backup system table. If passed in null, then write 0 byte. 440 * @param startCode start code 441 * @throws IOException exception 442 */ 443 public void writeBackupStartCode(Long startCode) throws IOException { 444 systemTable.writeBackupStartCode(startCode, backupInfo.getBackupRootDir()); 445 } 446 447 /** 448 * Get the RS log information after the last log roll from backup system table. 449 * @return RS log info 450 * @throws IOException exception 451 */ 452 public HashMap<String, Long> readRegionServerLastLogRollResult() throws IOException { 453 return systemTable.readRegionServerLastLogRollResult(backupInfo.getBackupRootDir()); 454 } 455 456 public Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>> 457 readBulkloadRows(List<TableName> tableList) throws IOException { 458 return systemTable.readBulkloadRows(tableList); 459 } 460 461 public void deleteBulkLoadedRows(List<byte[]> rows) throws IOException { 462 systemTable.deleteBulkLoadedRows(rows); 463 } 464 465 /** 466 * Get all completed backup information (in desc order by time) 467 * @return history info of BackupCompleteData 468 * @throws IOException exception 469 */ 470 public List<BackupInfo> getBackupHistory() throws IOException { 471 return systemTable.getBackupHistory(); 472 } 473 474 public ArrayList<BackupInfo> getBackupHistory(boolean completed) throws IOException { 475 return systemTable.getBackupHistory(completed); 476 } 477 478 /** 479 * Write the current timestamps for each regionserver to backup system table after a successful 480 * full or incremental backup. Each table may have a different set of log timestamps. The saved 481 * timestamp is of the last log file that was backed up already. 482 * @param tables tables 483 * @throws IOException exception 484 */ 485 public void writeRegionServerLogTimestamp(Set<TableName> tables, Map<String, Long> newTimestamps) 486 throws IOException { 487 systemTable.writeRegionServerLogTimestamp(tables, newTimestamps, backupInfo.getBackupRootDir()); 488 } 489 490 /** 491 * Read the timestamp for each region server log after the last successful backup. Each table has 492 * its own set of the timestamps. 493 * @return the timestamp for each region server. key: tableName value: 494 * RegionServer,PreviousTimeStamp 495 * @throws IOException exception 496 */ 497 public Map<TableName, Map<String, Long>> readLogTimestampMap() throws IOException { 498 return systemTable.readLogTimestampMap(backupInfo.getBackupRootDir()); 499 } 500 501 /** 502 * Return the current tables covered by incremental backup. 503 * @return set of tableNames 504 * @throws IOException exception 505 */ 506 public Set<TableName> getIncrementalBackupTableSet() throws IOException { 507 return systemTable.getIncrementalBackupTableSet(backupInfo.getBackupRootDir()); 508 } 509 510 /** 511 * Adds set of tables to overall incremental backup table set 512 * @param tables tables 513 * @throws IOException exception 514 */ 515 public void addIncrementalBackupTableSet(Set<TableName> tables) throws IOException { 516 systemTable.addIncrementalBackupTableSet(tables, backupInfo.getBackupRootDir()); 517 } 518 519 public Connection getConnection() { 520 return conn; 521 } 522}