001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup;
019
020import java.io.IOException;
021import java.util.Collections;
022import java.util.HashSet;
023import java.util.Set;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.FileStatus;
026import org.apache.hadoop.fs.Path;
027import org.apache.hadoop.hbase.Abortable;
028import org.apache.hadoop.hbase.HBaseInterfaceAudience;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
031import org.apache.hadoop.hbase.backup.impl.BulkLoad;
032import org.apache.hadoop.hbase.client.Connection;
033import org.apache.hadoop.hbase.client.ConnectionFactory;
034import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate;
035import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
041
042/**
043 * File cleaner that prevents deletion of HFiles that are still required by future incremental
044 * backups.
045 * <p>
046 * Bulk loaded HFiles that are needed by future updates are stored in the backup system table.
047 */
048@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
049public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements Abortable {
050  private static final Logger LOG = LoggerFactory.getLogger(BackupHFileCleaner.class);
051
052  private boolean stopped = false;
053  private boolean aborted = false;
054  private Connection connection;
055  // timestamp of most recent read from backup system table
056  private long prevReadFromBackupTbl = 0;
057  // timestamp of 2nd most recent read from backup system table
058  private long secondPrevReadFromBackupTbl = 0;
059
060  @Override
061  public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
062    if (stopped) {
063      return Collections.emptyList();
064    }
065
066    // We use filenames because the HFile will have been moved to the archive since it
067    // was registered.
068    final Set<String> hfileFilenames = new HashSet<>();
069    try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
070      Set<TableName> tablesIncludedInBackups = fetchFullyBackedUpTables(tbl);
071      for (BulkLoad bulkLoad : tbl.readBulkloadRows(tablesIncludedInBackups)) {
072        hfileFilenames.add(new Path(bulkLoad.getHfilePath()).getName());
073      }
074      LOG.debug("Found {} unique HFile filenames registered as bulk loads.", hfileFilenames.size());
075    } catch (IOException ioe) {
076      LOG.error(
077        "Failed to read registered bulk load references from backup system table, marking all files as non-deletable.",
078        ioe);
079      return Collections.emptyList();
080    }
081
082    secondPrevReadFromBackupTbl = prevReadFromBackupTbl;
083    prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime();
084
085    return Iterables.filter(files, file -> {
086      // If the file is recent, be conservative and wait for one more scan of the bulk loads
087      if (file.getModificationTime() > secondPrevReadFromBackupTbl) {
088        LOG.debug("Preventing deletion due to timestamp: {}", file.getPath().toString());
089        return false;
090      }
091      // A file can be deleted if it is not registered as a backup bulk load.
092      String hfile = file.getPath().getName();
093      if (hfileFilenames.contains(hfile)) {
094        LOG.debug("Preventing deletion due to bulk load registration in backup system table: {}",
095          file.getPath().toString());
096        return false;
097      } else {
098        LOG.debug("OK to delete: {}", file.getPath().toString());
099        return true;
100      }
101    });
102  }
103
104  protected Set<TableName> fetchFullyBackedUpTables(BackupSystemTable tbl) throws IOException {
105    return tbl.getTablesIncludedInBackups();
106  }
107
108  @Override
109  public boolean isFileDeletable(FileStatus fStat) {
110    throw new IllegalStateException("This method should not be called");
111  }
112
113  @Override
114  public void setConf(Configuration config) {
115    this.connection = null;
116    try {
117      this.connection = ConnectionFactory.createConnection(config);
118    } catch (IOException ioe) {
119      LOG.error("Couldn't establish connection", ioe);
120    }
121  }
122
123  @Override
124  public void stop(String why) {
125    if (this.stopped) {
126      return;
127    }
128    if (this.connection != null) {
129      try {
130        this.connection.close();
131      } catch (IOException ioe) {
132        LOG.debug("Got IOException when closing connection", ioe);
133      }
134    }
135    this.stopped = true;
136  }
137
138  @Override
139  public boolean isStopped() {
140    return this.stopped;
141  }
142
143  @Override
144  public void abort(String why, Throwable e) {
145    LOG.warn("Aborting ReplicationHFileCleaner because {}", why, e);
146    this.aborted = true;
147    stop(why);
148  }
149
150  @Override
151  public boolean isAborted() {
152    return this.aborted;
153  }
154}