001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup;
019
020import java.io.IOException;
021import java.util.Collections;
022import java.util.HashSet;
023import java.util.Set;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.FileStatus;
026import org.apache.hadoop.fs.Path;
027import org.apache.hadoop.hbase.Abortable;
028import org.apache.hadoop.hbase.HBaseInterfaceAudience;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
031import org.apache.hadoop.hbase.backup.impl.BulkLoad;
032import org.apache.hadoop.hbase.client.Connection;
033import org.apache.hadoop.hbase.client.ConnectionFactory;
034import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate;
035import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
041
042/**
043 * File cleaner that prevents deletion of HFiles that are still required by future incremental
044 * backups.
045 * <p>
046 * Bulk loaded HFiles that are needed by future updates are stored in the backup system table.
047 */
048@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
049public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements Abortable {
050  private static final Logger LOG = LoggerFactory.getLogger(BackupHFileCleaner.class);
051
052  private boolean stopped = false;
053  private boolean aborted = false;
054  private Connection connection;
055  // timestamp of most recent completed cleaning run
056  private volatile long previousCleaningCompletionTimestamp = 0;
057
058  @Override
059  public void postClean() {
060    previousCleaningCompletionTimestamp = EnvironmentEdgeManager.currentTime();
061  }
062
063  @Override
064  public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
065    if (stopped) {
066      return Collections.emptyList();
067    }
068
069    // We use filenames because the HFile will have been moved to the archive since it
070    // was registered.
071    final Set<String> hfileFilenames = new HashSet<>();
072    try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
073      Set<TableName> tablesIncludedInBackups = fetchFullyBackedUpTables(tbl);
074      for (BulkLoad bulkLoad : tbl.readBulkloadRows(tablesIncludedInBackups)) {
075        hfileFilenames.add(new Path(bulkLoad.getHfilePath()).getName());
076      }
077      LOG.debug("Found {} unique HFile filenames registered as bulk loads.", hfileFilenames.size());
078    } catch (IOException ioe) {
079      LOG.error(
080        "Failed to read registered bulk load references from backup system table, marking all files as non-deletable.",
081        ioe);
082      return Collections.emptyList();
083    }
084
085    // Pin the threshold, we don't want the result to change depending on evaluation time.
086    final long recentFileThreshold = previousCleaningCompletionTimestamp;
087
088    return Iterables.filter(files, file -> {
089      // If the file is recent, be conservative and wait for one more scan of the bulk loads
090      if (file.getModificationTime() > recentFileThreshold) {
091        LOG.debug("Preventing deletion due to timestamp: {}", file.getPath().toString());
092        return false;
093      }
094      // A file can be deleted if it is not registered as a backup bulk load.
095      String hfile = file.getPath().getName();
096      if (hfileFilenames.contains(hfile)) {
097        LOG.debug("Preventing deletion due to bulk load registration in backup system table: {}",
098          file.getPath().toString());
099        return false;
100      } else {
101        LOG.debug("OK to delete: {}", file.getPath().toString());
102        return true;
103      }
104    });
105  }
106
107  protected Set<TableName> fetchFullyBackedUpTables(BackupSystemTable tbl) throws IOException {
108    return tbl.getTablesIncludedInBackups();
109  }
110
111  @Override
112  public boolean isFileDeletable(FileStatus fStat) {
113    throw new IllegalStateException("This method should not be called");
114  }
115
116  @Override
117  public void setConf(Configuration config) {
118    this.connection = null;
119    try {
120      this.connection = ConnectionFactory.createConnection(config);
121    } catch (IOException ioe) {
122      LOG.error("Couldn't establish connection", ioe);
123    }
124  }
125
126  @Override
127  public void stop(String why) {
128    if (this.stopped) {
129      return;
130    }
131    if (this.connection != null) {
132      try {
133        this.connection.close();
134      } catch (IOException ioe) {
135        LOG.debug("Got IOException when closing connection", ioe);
136      }
137    }
138    this.stopped = true;
139  }
140
141  @Override
142  public boolean isStopped() {
143    return this.stopped;
144  }
145
146  @Override
147  public void abort(String why, Throwable e) {
148    LOG.warn("Aborting ReplicationHFileCleaner because {}", why, e);
149    this.aborted = true;
150    stop(why);
151  }
152
153  @Override
154  public boolean isAborted() {
155    return this.aborted;
156  }
157}