001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup.impl;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.backup.BackupHFileCleaner;
032import org.apache.hadoop.hbase.backup.BackupInfo;
033import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
034import org.apache.hadoop.hbase.backup.BackupObserver;
035import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
036import org.apache.hadoop.hbase.backup.BackupType;
037import org.apache.hadoop.hbase.backup.HBackupFileSystem;
038import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
039import org.apache.hadoop.hbase.backup.master.BackupLogCleaner;
040import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
041import org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager;
042import org.apache.hadoop.hbase.client.Admin;
043import org.apache.hadoop.hbase.client.Connection;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
046import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
047import org.apache.hadoop.hbase.procedure.ProcedureManagerHost;
048import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
049import org.apache.hadoop.hbase.util.Pair;
050import org.apache.yetus.audience.InterfaceAudience;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054/**
055 * Handles backup requests, creates backup info records in backup system table to keep track of
056 * backup sessions, dispatches backup request.
057 */
058@InterfaceAudience.Private
059public class BackupManager implements Closeable {
060  // in seconds
061  public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY =
062    "hbase.backup.exclusive.op.timeout.seconds";
063  // In seconds
064  private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600;
065  private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class);
066
067  protected Configuration conf = null;
068  protected BackupInfo backupInfo = null;
069  protected BackupSystemTable systemTable;
070  protected final Connection conn;
071
072  /**
073   * Backup manager constructor.
074   * @param conn connection
075   * @param conf configuration
076   * @throws IOException exception
077   */
078  public BackupManager(Connection conn, Configuration conf) throws IOException {
079    if (
080      !conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
081        BackupRestoreConstants.BACKUP_ENABLE_DEFAULT)
082    ) {
083      throw new BackupException("HBase backup is not enabled. Check your "
084        + BackupRestoreConstants.BACKUP_ENABLE_KEY + " setting.");
085    }
086    this.conf = conf;
087    this.conn = conn;
088    this.systemTable = new BackupSystemTable(conn);
089  }
090
091  /**
092   * Returns backup info
093   */
094  protected BackupInfo getBackupInfo() {
095    return backupInfo;
096  }
097
098  /**
099   * This method modifies the master's configuration in order to inject backup-related features
100   * (TESTs only)
101   * @param conf configuration
102   */
103  public static void decorateMasterConfiguration(Configuration conf) {
104    if (!isBackupEnabled(conf)) {
105      return;
106    }
107    // Add WAL archive cleaner plug-in
108    String plugins = conf.get(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
109    String cleanerClass = BackupLogCleaner.class.getCanonicalName();
110    if (!plugins.contains(cleanerClass)) {
111      conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
112    }
113
114    String classes = conf.get(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY);
115    String masterProcedureClass = LogRollMasterProcedureManager.class.getName();
116    if (classes == null) {
117      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, masterProcedureClass);
118    } else if (!classes.contains(masterProcedureClass)) {
119      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY,
120        classes + "," + masterProcedureClass);
121    }
122
123    plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
124    conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
125      (plugins == null ? "" : plugins + ",") + BackupHFileCleaner.class.getName());
126    if (LOG.isDebugEnabled()) {
127      LOG.debug(
128        "Added log cleaner: {}. Added master procedure manager: {}."
129          + "Added master procedure manager: {}",
130        cleanerClass, masterProcedureClass, BackupHFileCleaner.class.getName());
131    }
132  }
133
134  /**
135   * This method modifies the Region Server configuration in order to inject backup-related features
136   * TESTs only.
137   * @param conf configuration
138   */
139  public static void decorateRegionServerConfiguration(Configuration conf) {
140    if (!isBackupEnabled(conf)) {
141      return;
142    }
143
144    String classes = conf.get(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY);
145    String regionProcedureClass = LogRollRegionServerProcedureManager.class.getName();
146    if (classes == null) {
147      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, regionProcedureClass);
148    } else if (!classes.contains(regionProcedureClass)) {
149      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY,
150        classes + "," + regionProcedureClass);
151    }
152    String coproc = conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY);
153    String regionObserverClass = BackupObserver.class.getName();
154    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
155      (coproc == null ? "" : coproc + ",") + regionObserverClass);
156    if (LOG.isDebugEnabled()) {
157      LOG.debug("Added region procedure manager: {}. Added region observer: {}",
158        regionProcedureClass, regionObserverClass);
159    }
160  }
161
162  public static boolean isBackupEnabled(Configuration conf) {
163    return conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
164      BackupRestoreConstants.BACKUP_ENABLE_DEFAULT);
165  }
166
167  /**
168   * Get configuration
169   */
170  Configuration getConf() {
171    return conf;
172  }
173
174  /**
175   * Stop all the work of backup.
176   */
177  @Override
178  public void close() {
179    if (systemTable != null) {
180      try {
181        systemTable.close();
182      } catch (Exception e) {
183        LOG.error(e.toString(), e);
184      }
185    }
186  }
187
188  /**
189   * Creates a backup info based on input backup request.
190   * @param backupId      backup id
191   * @param type          type
192   * @param tableList     table list
193   * @param targetRootDir root dir
194   * @param workers       number of parallel workers
195   * @param bandwidth     bandwidth per worker in MB per sec
196   * @throws BackupException exception
197   */
198  public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList,
199    String targetRootDir, int workers, long bandwidth) throws BackupException {
200    if (targetRootDir == null) {
201      throw new BackupException("Wrong backup request parameter: target backup root directory");
202    }
203
204    if (type == BackupType.FULL && (tableList == null || tableList.isEmpty())) {
205      // If table list is null for full backup, which means backup all tables. Then fill the table
206      // list with all user tables from meta. It no table available, throw the request exception.
207      List<TableDescriptor> htds = null;
208      try (Admin admin = conn.getAdmin()) {
209        htds = admin.listTableDescriptors();
210      } catch (Exception e) {
211        throw new BackupException(e);
212      }
213
214      if (htds == null) {
215        throw new BackupException("No table exists for full backup of all tables.");
216      } else {
217        tableList = new ArrayList<>();
218        for (TableDescriptor hTableDescriptor : htds) {
219          TableName tn = hTableDescriptor.getTableName();
220          if (tn.equals(BackupSystemTable.getTableName(conf))) {
221            // skip backup system table
222            continue;
223          }
224          tableList.add(hTableDescriptor.getTableName());
225        }
226
227        LOG.info("Full backup all the tables available in the cluster: {}", tableList);
228      }
229    }
230
231    // there are one or more tables in the table list
232    backupInfo = new BackupInfo(backupId, type, tableList.toArray(new TableName[tableList.size()]),
233      targetRootDir);
234    backupInfo.setBandwidth(bandwidth);
235    backupInfo.setWorkers(workers);
236    return backupInfo;
237  }
238
239  /**
240   * Check if any ongoing backup. Currently, we only reply on checking status in backup system
241   * table. We need to consider to handle the case of orphan records in the future. Otherwise, all
242   * the coming request will fail.
243   * @return the ongoing backup id if on going backup exists, otherwise null
244   * @throws IOException exception
245   */
246  private String getOngoingBackupId() throws IOException {
247    ArrayList<BackupInfo> sessions = systemTable.getBackupInfos(BackupState.RUNNING);
248    if (sessions.size() == 0) {
249      return null;
250    }
251    return sessions.get(0).getBackupId();
252  }
253
254  /**
255   * Start the backup manager service.
256   * @throws IOException exception
257   */
258  public void initialize() throws IOException {
259    String ongoingBackupId = this.getOngoingBackupId();
260    if (ongoingBackupId != null) {
261      LOG.info("There is a ongoing backup {}"
262        + ". Can not launch new backup until no ongoing backup remains.", ongoingBackupId);
263      throw new BackupException("There is ongoing backup seesion.");
264    }
265  }
266
267  public void setBackupInfo(BackupInfo backupInfo) {
268    this.backupInfo = backupInfo;
269  }
270
271  /**
272   * Get direct ancestors of the current backup.
273   * @param backupInfo The backup info for the current backup
274   * @return The ancestors for the current backup
275   * @throws IOException exception
276   */
277  public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo) throws IOException {
278    LOG.debug("Getting the direct ancestors of the current backup {}", backupInfo.getBackupId());
279
280    ArrayList<BackupImage> ancestors = new ArrayList<>();
281
282    // full backup does not have ancestor
283    if (backupInfo.getType() == BackupType.FULL) {
284      LOG.debug("Current backup is a full backup, no direct ancestor for it.");
285      return ancestors;
286    }
287
288    // get all backup history list in descending order
289    ArrayList<BackupInfo> allHistoryList = getBackupHistory(true);
290    for (BackupInfo backup : allHistoryList) {
291
292      BackupImage.Builder builder = BackupImage.newBuilder();
293
294      BackupImage image = builder.withBackupId(backup.getBackupId()).withType(backup.getType())
295        .withRootDir(backup.getBackupRootDir()).withTableList(backup.getTableNames())
296        .withStartTime(backup.getStartTs()).withCompleteTime(backup.getCompleteTs()).build();
297
298      // Only direct ancestors for a backup are required and not entire history of backup for this
299      // table resulting in verifying all of the previous backups which is unnecessary and backup
300      // paths need not be valid beyond the lifetime of a backup.
301      //
302      // RootDir is way of grouping a single backup including one full and many incremental backups
303      if (!image.getRootDir().equals(backupInfo.getBackupRootDir())) {
304        continue;
305      }
306
307      // add the full backup image as an ancestor until the last incremental backup
308      if (backup.getType().equals(BackupType.FULL)) {
309        // check the backup image coverage, if previous image could be covered by the newer ones,
310        // then no need to add
311        if (!BackupManifest.canCoverImage(ancestors, image)) {
312          ancestors.add(image);
313        }
314      } else {
315        // found last incremental backup, if previously added full backup ancestor images can cover
316        // it, then this incremental ancestor is not the dependent of the current incremental
317        // backup, that is to say, this is the backup scope boundary of current table set.
318        // Otherwise, this incremental backup ancestor is the dependent ancestor of the ongoing
319        // incremental backup
320        if (BackupManifest.canCoverImage(ancestors, image)) {
321          LOG.debug("Met the backup boundary of the current table set:");
322          for (BackupImage image1 : ancestors) {
323            LOG.debug("  BackupID={}, BackupDir={}", image1.getBackupId(), image1.getRootDir());
324          }
325        } else {
326          Path logBackupPath =
327            HBackupFileSystem.getBackupPath(backup.getBackupRootDir(), backup.getBackupId());
328          LOG.debug(
329            "Current backup has an incremental backup ancestor, "
330              + "touching its image manifest in {}" + " to construct the dependency.",
331            logBackupPath.toString());
332          BackupManifest lastIncrImgManifest = new BackupManifest(conf, logBackupPath);
333          BackupImage lastIncrImage = lastIncrImgManifest.getBackupImage();
334          ancestors.add(lastIncrImage);
335
336          LOG.debug("Last dependent incremental backup image: {BackupID={}" + "BackupDir={}}",
337            lastIncrImage.getBackupId(), lastIncrImage.getRootDir());
338        }
339      }
340    }
341    LOG.debug("Got {} ancestors for the current backup.", ancestors.size());
342    return ancestors;
343  }
344
345  /**
346   * Get the direct ancestors of this backup for one table involved.
347   * @param backupInfo backup info
348   * @param table      table
349   * @return backupImages on the dependency list
350   * @throws IOException exception
351   */
352  public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo, TableName table)
353    throws IOException {
354    ArrayList<BackupImage> ancestors = getAncestors(backupInfo);
355    ArrayList<BackupImage> tableAncestors = new ArrayList<>();
356    for (BackupImage image : ancestors) {
357      if (image.hasTable(table)) {
358        tableAncestors.add(image);
359        if (image.getType() == BackupType.FULL) {
360          break;
361        }
362      }
363    }
364    return tableAncestors;
365  }
366
367  /*
368   * backup system table operations
369   */
370
371  /**
372   * Updates status (state) of a backup session in a persistent store
373   * @param context context
374   * @throws IOException exception
375   */
376  public void updateBackupInfo(BackupInfo context) throws IOException {
377    systemTable.updateBackupInfo(context);
378  }
379
380  /**
381   * Starts new backup session
382   * @throws IOException if active session already exists
383   */
384  public void startBackupSession() throws IOException {
385    long startTime = EnvironmentEdgeManager.currentTime();
386    long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY,
387      DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L;
388    long lastWarningOutputTime = 0;
389    while (EnvironmentEdgeManager.currentTime() - startTime < timeout) {
390      try {
391        systemTable.startBackupExclusiveOperation();
392        return;
393      } catch (IOException e) {
394        if (e instanceof ExclusiveOperationException) {
395          // sleep, then repeat
396          try {
397            Thread.sleep(1000);
398          } catch (InterruptedException e1) {
399            // Restore the interrupted status
400            Thread.currentThread().interrupt();
401          }
402          if (
403            lastWarningOutputTime == 0
404              || (EnvironmentEdgeManager.currentTime() - lastWarningOutputTime) > 60000
405          ) {
406            lastWarningOutputTime = EnvironmentEdgeManager.currentTime();
407            LOG.warn("Waiting to acquire backup exclusive lock for {}s",
408              +(lastWarningOutputTime - startTime) / 1000);
409          }
410        } else {
411          throw e;
412        }
413      }
414    }
415    throw new IOException(
416      "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s");
417  }
418
419  /**
420   * Finishes active backup session
421   * @throws IOException if no active session
422   */
423  public void finishBackupSession() throws IOException {
424    systemTable.finishBackupExclusiveOperation();
425  }
426
427  /**
428   * Read the last backup start code (timestamp) of last successful backup. Will return null if
429   * there is no startcode stored in backup system table or the value is of length 0. These two
430   * cases indicate there is no successful backup completed so far.
431   * @return the timestamp of a last successful backup
432   * @throws IOException exception
433   */
434  public String readBackupStartCode() throws IOException {
435    return systemTable.readBackupStartCode(backupInfo.getBackupRootDir());
436  }
437
438  /**
439   * Write the start code (timestamp) to backup system table. If passed in null, then write 0 byte.
440   * @param startCode start code
441   * @throws IOException exception
442   */
443  public void writeBackupStartCode(Long startCode) throws IOException {
444    systemTable.writeBackupStartCode(startCode, backupInfo.getBackupRootDir());
445  }
446
447  /**
448   * Get the RS log information after the last log roll from backup system table.
449   * @return RS log info
450   * @throws IOException exception
451   */
452  public HashMap<String, Long> readRegionServerLastLogRollResult() throws IOException {
453    return systemTable.readRegionServerLastLogRollResult(backupInfo.getBackupRootDir());
454  }
455
456  public Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>>
457    readBulkloadRows(List<TableName> tableList) throws IOException {
458    return systemTable.readBulkloadRows(tableList);
459  }
460
461  public void deleteBulkLoadedRows(List<byte[]> rows) throws IOException {
462    systemTable.deleteBulkLoadedRows(rows);
463  }
464
465  /**
466   * Get all completed backup information (in desc order by time)
467   * @return history info of BackupCompleteData
468   * @throws IOException exception
469   */
470  public List<BackupInfo> getBackupHistory() throws IOException {
471    return systemTable.getBackupHistory();
472  }
473
474  public ArrayList<BackupInfo> getBackupHistory(boolean completed) throws IOException {
475    return systemTable.getBackupHistory(completed);
476  }
477
478  /**
479   * Write the current timestamps for each regionserver to backup system table after a successful
480   * full or incremental backup. Each table may have a different set of log timestamps. The saved
481   * timestamp is of the last log file that was backed up already.
482   * @param tables tables
483   * @throws IOException exception
484   */
485  public void writeRegionServerLogTimestamp(Set<TableName> tables, Map<String, Long> newTimestamps)
486    throws IOException {
487    systemTable.writeRegionServerLogTimestamp(tables, newTimestamps, backupInfo.getBackupRootDir());
488  }
489
490  /**
491   * Read the timestamp for each region server log after the last successful backup. Each table has
492   * its own set of the timestamps.
493   * @return the timestamp for each region server. key: tableName value:
494   *         RegionServer,PreviousTimeStamp
495   * @throws IOException exception
496   */
497  public Map<TableName, Map<String, Long>> readLogTimestampMap() throws IOException {
498    return systemTable.readLogTimestampMap(backupInfo.getBackupRootDir());
499  }
500
501  /**
502   * Return the current tables covered by incremental backup.
503   * @return set of tableNames
504   * @throws IOException exception
505   */
506  public Set<TableName> getIncrementalBackupTableSet() throws IOException {
507    return systemTable.getIncrementalBackupTableSet(backupInfo.getBackupRootDir());
508  }
509
510  /**
511   * Adds set of tables to overall incremental backup table set
512   * @param tables tables
513   * @throws IOException exception
514   */
515  public void addIncrementalBackupTableSet(Set<TableName> tables) throws IOException {
516    systemTable.addIncrementalBackupTableSet(tables, backupInfo.getBackupRootDir());
517  }
518
519  public Connection getConnection() {
520    return conn;
521  }
522}