001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.backup.impl;
020
021import java.io.Closeable;
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.HashMap;
025import java.util.Iterator;
026import java.util.List;
027import java.util.Map;
028import java.util.Set;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.backup.BackupHFileCleaner;
034import org.apache.hadoop.hbase.backup.BackupInfo;
035import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
036import org.apache.hadoop.hbase.backup.BackupObserver;
037import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
038import org.apache.hadoop.hbase.backup.BackupType;
039import org.apache.hadoop.hbase.backup.HBackupFileSystem;
040import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
041import org.apache.hadoop.hbase.backup.master.BackupLogCleaner;
042import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
043import org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager;
044import org.apache.hadoop.hbase.client.Admin;
045import org.apache.hadoop.hbase.client.Connection;
046import org.apache.hadoop.hbase.client.TableDescriptor;
047import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
048import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
049import org.apache.hadoop.hbase.procedure.ProcedureManagerHost;
050import org.apache.hadoop.hbase.util.Pair;
051import org.apache.yetus.audience.InterfaceAudience;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
056
057/**
058 * Handles backup requests, creates backup info records in backup system table to keep track of
059 * backup sessions, dispatches backup request.
060 */
061@InterfaceAudience.Private
062public class BackupManager implements Closeable {
063  // in seconds
064  public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY =
065      "hbase.backup.exclusive.op.timeout.seconds";
066  // In seconds
067  private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600;
068  private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class);
069
070  protected Configuration conf = null;
071  protected BackupInfo backupInfo = null;
072  protected BackupSystemTable systemTable;
073  protected final Connection conn;
074
075  /**
076   * Backup manager constructor.
077   * @param conn connection
078   * @param conf configuration
079   * @throws IOException exception
080   */
081  public BackupManager(Connection conn, Configuration conf) throws IOException {
082    if (!conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
083      BackupRestoreConstants.BACKUP_ENABLE_DEFAULT)) {
084      throw new BackupException("HBase backup is not enabled. Check your "
085          + BackupRestoreConstants.BACKUP_ENABLE_KEY + " setting.");
086    }
087    this.conf = conf;
088    this.conn = conn;
089    this.systemTable = new BackupSystemTable(conn);
090  }
091
092  /**
093   * Returns backup info
094   */
095  protected BackupInfo getBackupInfo() {
096    return backupInfo;
097  }
098
099  /**
100   * This method modifies the master's configuration in order to inject backup-related features
101   * (TESTs only)
102   * @param conf configuration
103   */
104  @VisibleForTesting
105  public static void decorateMasterConfiguration(Configuration conf) {
106    if (!isBackupEnabled(conf)) {
107      return;
108    }
109    // Add WAL archive cleaner plug-in
110    String plugins = conf.get(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
111    String cleanerClass = BackupLogCleaner.class.getCanonicalName();
112    if (!plugins.contains(cleanerClass)) {
113      conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
114    }
115
116    String classes = conf.get(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY);
117    String masterProcedureClass = LogRollMasterProcedureManager.class.getName();
118    if (classes == null) {
119      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, masterProcedureClass);
120    } else if (!classes.contains(masterProcedureClass)) {
121      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY,
122        classes + "," + masterProcedureClass);
123    }
124
125    plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
126    conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, (plugins == null ? "" : plugins + ",") +
127      BackupHFileCleaner.class.getName());
128    if (LOG.isDebugEnabled()) {
129      LOG.debug("Added log cleaner: {}. Added master procedure manager: {}."
130        +"Added master procedure manager: {}", cleanerClass, masterProcedureClass,
131        BackupHFileCleaner.class.getName());
132    }
133  }
134
135  /**
136   * This method modifies the Region Server configuration in order to inject backup-related features
137   * TESTs only.
138   * @param conf configuration
139   */
140  @VisibleForTesting
141  public static void decorateRegionServerConfiguration(Configuration conf) {
142    if (!isBackupEnabled(conf)) {
143      return;
144    }
145
146    String classes = conf.get(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY);
147    String regionProcedureClass = LogRollRegionServerProcedureManager.class.getName();
148    if (classes == null) {
149      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, regionProcedureClass);
150    } else if (!classes.contains(regionProcedureClass)) {
151      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY,
152        classes + "," + regionProcedureClass);
153    }
154    String coproc = conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY);
155    String regionObserverClass = BackupObserver.class.getName();
156    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
157      (coproc == null ? "" : coproc + ",") + regionObserverClass);
158    if (LOG.isDebugEnabled()) {
159      LOG.debug("Added region procedure manager: {}. Added region observer: {}",
160        regionProcedureClass, regionObserverClass);
161    }
162  }
163
164  public static boolean isBackupEnabled(Configuration conf) {
165    return conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
166      BackupRestoreConstants.BACKUP_ENABLE_DEFAULT);
167  }
168
169  /**
170   * Get configuration
171   * @return configuration
172   */
173  Configuration getConf() {
174    return conf;
175  }
176
177  /**
178   * Stop all the work of backup.
179   */
180  @Override
181  public void close() {
182    if (systemTable != null) {
183      try {
184        systemTable.close();
185      } catch (Exception e) {
186        LOG.error(e.toString(), e);
187      }
188    }
189  }
190
191  /**
192   * Creates a backup info based on input backup request.
193   * @param backupId backup id
194   * @param type type
195   * @param tableList table list
196   * @param targetRootDir root dir
197   * @param workers number of parallel workers
198   * @param bandwidth bandwidth per worker in MB per sec
199   * @return BackupInfo
200   * @throws BackupException exception
201   */
202  public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList,
203      String targetRootDir, int workers, long bandwidth) throws BackupException {
204    if (targetRootDir == null) {
205      throw new BackupException("Wrong backup request parameter: target backup root directory");
206    }
207
208    if (type == BackupType.FULL && (tableList == null || tableList.isEmpty())) {
209      // If table list is null for full backup, which means backup all tables. Then fill the table
210      // list with all user tables from meta. It no table available, throw the request exception.
211      List<TableDescriptor> htds = null;
212      try (Admin admin = conn.getAdmin()) {
213        htds = admin.listTableDescriptors();
214      } catch (Exception e) {
215        throw new BackupException(e);
216      }
217
218      if (htds == null) {
219        throw new BackupException("No table exists for full backup of all tables.");
220      } else {
221        tableList = new ArrayList<>();
222        for (TableDescriptor hTableDescriptor : htds) {
223          TableName tn = hTableDescriptor.getTableName();
224          if (tn.equals(BackupSystemTable.getTableName(conf))) {
225            // skip backup system table
226            continue;
227          }
228          tableList.add(hTableDescriptor.getTableName());
229        }
230
231        LOG.info("Full backup all the tables available in the cluster: {}", tableList);
232      }
233    }
234
235    // there are one or more tables in the table list
236    backupInfo = new BackupInfo(backupId, type, tableList.toArray(new TableName[tableList.size()]),
237      targetRootDir);
238    backupInfo.setBandwidth(bandwidth);
239    backupInfo.setWorkers(workers);
240    return backupInfo;
241  }
242
243  /**
244   * Check if any ongoing backup. Currently, we only reply on checking status in backup system
245   * table. We need to consider to handle the case of orphan records in the future. Otherwise, all
246   * the coming request will fail.
247   * @return the ongoing backup id if on going backup exists, otherwise null
248   * @throws IOException exception
249   */
250  private String getOngoingBackupId() throws IOException {
251    ArrayList<BackupInfo> sessions = systemTable.getBackupInfos(BackupState.RUNNING);
252    if (sessions.size() == 0) {
253      return null;
254    }
255    return sessions.get(0).getBackupId();
256  }
257
258  /**
259   * Start the backup manager service.
260   * @throws IOException exception
261   */
262  public void initialize() throws IOException {
263    String ongoingBackupId = this.getOngoingBackupId();
264    if (ongoingBackupId != null) {
265      LOG.info("There is a ongoing backup {}"
266        + ". Can not launch new backup until no ongoing backup remains.", ongoingBackupId);
267      throw new BackupException("There is ongoing backup seesion.");
268    }
269  }
270
271  public void setBackupInfo(BackupInfo backupInfo) {
272    this.backupInfo = backupInfo;
273  }
274
275  /**
276   * Get direct ancestors of the current backup.
277   * @param backupInfo The backup info for the current backup
278   * @return The ancestors for the current backup
279   * @throws IOException exception
280   */
281  public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo) throws IOException {
282    LOG.debug("Getting the direct ancestors of the current backup {}", backupInfo.getBackupId());
283
284    ArrayList<BackupImage> ancestors = new ArrayList<>();
285
286    // full backup does not have ancestor
287    if (backupInfo.getType() == BackupType.FULL) {
288      LOG.debug("Current backup is a full backup, no direct ancestor for it.");
289      return ancestors;
290    }
291
292    // get all backup history list in descending order
293    ArrayList<BackupInfo> allHistoryList = getBackupHistory(true);
294    for (BackupInfo backup : allHistoryList) {
295
296      BackupImage.Builder builder = BackupImage.newBuilder();
297
298      BackupImage image = builder.withBackupId(backup.getBackupId()).withType(backup.getType())
299          .withRootDir(backup.getBackupRootDir()).withTableList(backup.getTableNames())
300          .withStartTime(backup.getStartTs()).withCompleteTime(backup.getCompleteTs()).build();
301
302      // add the full backup image as an ancestor until the last incremental backup
303      if (backup.getType().equals(BackupType.FULL)) {
304        // check the backup image coverage, if previous image could be covered by the newer ones,
305        // then no need to add
306        if (!BackupManifest.canCoverImage(ancestors, image)) {
307          ancestors.add(image);
308        }
309      } else {
310        // found last incremental backup, if previously added full backup ancestor images can cover
311        // it, then this incremental ancestor is not the dependent of the current incremental
312        // backup, that is to say, this is the backup scope boundary of current table set.
313        // Otherwise, this incremental backup ancestor is the dependent ancestor of the ongoing
314        // incremental backup
315        if (BackupManifest.canCoverImage(ancestors, image)) {
316          LOG.debug("Met the backup boundary of the current table set:");
317          for (BackupImage image1 : ancestors) {
318            LOG.debug("  BackupID={}, BackupDir={}", image1.getBackupId(),  image1.getRootDir());
319          }
320        } else {
321          Path logBackupPath =
322              HBackupFileSystem.getBackupPath(backup.getBackupRootDir(), backup.getBackupId());
323          LOG.debug("Current backup has an incremental backup ancestor, "
324              + "touching its image manifest in {}"
325              + " to construct the dependency.", logBackupPath.toString());
326          BackupManifest lastIncrImgManifest = new BackupManifest(conf, logBackupPath);
327          BackupImage lastIncrImage = lastIncrImgManifest.getBackupImage();
328          ancestors.add(lastIncrImage);
329
330          LOG.debug(
331            "Last dependent incremental backup image: {BackupID={}" +
332                "BackupDir={}}", lastIncrImage.getBackupId(), lastIncrImage.getRootDir());
333        }
334      }
335    }
336    LOG.debug("Got {} ancestors for the current backup.", ancestors.size());
337    return ancestors;
338  }
339
340  /**
341   * Get the direct ancestors of this backup for one table involved.
342   * @param backupInfo backup info
343   * @param table table
344   * @return backupImages on the dependency list
345   * @throws IOException exception
346   */
347  public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo, TableName table)
348      throws IOException {
349    ArrayList<BackupImage> ancestors = getAncestors(backupInfo);
350    ArrayList<BackupImage> tableAncestors = new ArrayList<>();
351    for (BackupImage image : ancestors) {
352      if (image.hasTable(table)) {
353        tableAncestors.add(image);
354        if (image.getType() == BackupType.FULL) {
355          break;
356        }
357      }
358    }
359    return tableAncestors;
360  }
361
362  /*
363   * backup system table operations
364   */
365
366  /**
367   * Updates status (state) of a backup session in a persistent store
368   * @param context context
369   * @throws IOException exception
370   */
371  public void updateBackupInfo(BackupInfo context) throws IOException {
372    systemTable.updateBackupInfo(context);
373  }
374
375  /**
376   * Starts new backup session
377   * @throws IOException if active session already exists
378   */
379  public void startBackupSession() throws IOException {
380    long startTime = System.currentTimeMillis();
381    long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY,
382      DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L;
383    long lastWarningOutputTime = 0;
384    while (System.currentTimeMillis() - startTime < timeout) {
385      try {
386        systemTable.startBackupExclusiveOperation();
387        return;
388      } catch (IOException e) {
389        if (e instanceof ExclusiveOperationException) {
390          // sleep, then repeat
391          try {
392            Thread.sleep(1000);
393          } catch (InterruptedException e1) {
394            // Restore the interrupted status
395            Thread.currentThread().interrupt();
396          }
397          if (lastWarningOutputTime == 0
398              || (System.currentTimeMillis() - lastWarningOutputTime) > 60000) {
399            lastWarningOutputTime = System.currentTimeMillis();
400            LOG.warn("Waiting to acquire backup exclusive lock for {}s",
401                +(lastWarningOutputTime - startTime) / 1000);
402          }
403        } else {
404          throw e;
405        }
406      }
407    }
408    throw new IOException(
409      "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s");
410  }
411
412  /**
413   * Finishes active backup session
414   * @throws IOException if no active session
415   */
416  public void finishBackupSession() throws IOException {
417    systemTable.finishBackupExclusiveOperation();
418  }
419
420  /**
421   * Read the last backup start code (timestamp) of last successful backup. Will return null if
422   * there is no startcode stored in backup system table or the value is of length 0. These two
423   * cases indicate there is no successful backup completed so far.
424   * @return the timestamp of a last successful backup
425   * @throws IOException exception
426   */
427  public String readBackupStartCode() throws IOException {
428    return systemTable.readBackupStartCode(backupInfo.getBackupRootDir());
429  }
430
431  /**
432   * Write the start code (timestamp) to backup system table. If passed in null, then write 0 byte.
433   * @param startCode start code
434   * @throws IOException exception
435   */
436  public void writeBackupStartCode(Long startCode) throws IOException {
437    systemTable.writeBackupStartCode(startCode, backupInfo.getBackupRootDir());
438  }
439
440  /**
441   * Get the RS log information after the last log roll from backup system table.
442   * @return RS log info
443   * @throws IOException exception
444   */
445  public HashMap<String, Long> readRegionServerLastLogRollResult() throws IOException {
446    return systemTable.readRegionServerLastLogRollResult(backupInfo.getBackupRootDir());
447  }
448
449  public Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>>
450    readBulkloadRows(List<TableName> tableList) throws IOException {
451    return systemTable.readBulkloadRows(tableList);
452  }
453
454  public void deleteBulkLoadedRows(List<byte[]> rows) throws IOException {
455    systemTable.deleteBulkLoadedRows(rows);
456  }
457
458  /**
459   * Get all completed backup information (in desc order by time)
460   * @return history info of BackupCompleteData
461   * @throws IOException exception
462   */
463  public List<BackupInfo> getBackupHistory() throws IOException {
464    return systemTable.getBackupHistory();
465  }
466
467  public ArrayList<BackupInfo> getBackupHistory(boolean completed) throws IOException {
468    return systemTable.getBackupHistory(completed);
469  }
470
471  /**
472   * Write the current timestamps for each regionserver to backup system table after a successful
473   * full or incremental backup. Each table may have a different set of log timestamps. The saved
474   * timestamp is of the last log file that was backed up already.
475   * @param tables tables
476   * @throws IOException exception
477   */
478  public void writeRegionServerLogTimestamp(Set<TableName> tables,
479      HashMap<String, Long> newTimestamps) throws IOException {
480    systemTable.writeRegionServerLogTimestamp(tables, newTimestamps, backupInfo.getBackupRootDir());
481  }
482
483  /**
484   * Read the timestamp for each region server log after the last successful backup. Each table has
485   * its own set of the timestamps.
486   * @return the timestamp for each region server. key: tableName value:
487   *         RegionServer,PreviousTimeStamp
488   * @throws IOException exception
489   */
490  public HashMap<TableName, HashMap<String, Long>> readLogTimestampMap() throws IOException {
491    return systemTable.readLogTimestampMap(backupInfo.getBackupRootDir());
492  }
493
494  /**
495   * Return the current tables covered by incremental backup.
496   * @return set of tableNames
497   * @throws IOException exception
498   */
499  public Set<TableName> getIncrementalBackupTableSet() throws IOException {
500    return systemTable.getIncrementalBackupTableSet(backupInfo.getBackupRootDir());
501  }
502
503  /**
504   * Adds set of tables to overall incremental backup table set
505   * @param tables tables
506   * @throws IOException exception
507   */
508  public void addIncrementalBackupTableSet(Set<TableName> tables) throws IOException {
509    systemTable.addIncrementalBackupTableSet(tables, backupInfo.getBackupRootDir());
510  }
511
512  /**
513   * Saves list of WAL files after incremental backup operation. These files will be stored until
514   * TTL expiration and are used by Backup Log Cleaner plug-in to determine which WAL files can be
515   * safely purged.
516   */
517  public void recordWALFiles(List<String> files) throws IOException {
518    systemTable.addWALFiles(files, backupInfo.getBackupId(), backupInfo.getBackupRootDir());
519  }
520
521  /**
522   * Get WAL files iterator.
523   * @return WAL files iterator from backup system table
524   * @throws IOException if getting the WAL files iterator fails
525   */
526  public Iterator<BackupSystemTable.WALItem> getWALFilesFromBackupSystem() throws IOException {
527    return systemTable.getWALFilesIterator(backupInfo.getBackupRootDir());
528  }
529
530  public Connection getConnection() {
531    return conn;
532  }
533}