001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup; 019 020import java.io.IOException; 021import java.util.Collections; 022import java.util.HashSet; 023import java.util.Set; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.FileStatus; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.Abortable; 028import org.apache.hadoop.hbase.HBaseInterfaceAudience; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; 031import org.apache.hadoop.hbase.backup.impl.BulkLoad; 032import org.apache.hadoop.hbase.client.Connection; 033import org.apache.hadoop.hbase.client.ConnectionFactory; 034import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; 035import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 041 042/** 043 * File cleaner that prevents deletion of HFiles that are still required by future incremental 044 * backups. 045 * <p> 046 * Bulk loaded HFiles that are needed by future updates are stored in the backup system table. 047 */ 048@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) 049public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements Abortable { 050 private static final Logger LOG = LoggerFactory.getLogger(BackupHFileCleaner.class); 051 052 private boolean stopped = false; 053 private boolean aborted = false; 054 private Connection connection; 055 // timestamp of most recent completed cleaning run 056 private volatile long previousCleaningCompletionTimestamp = 0; 057 058 @Override 059 public void postClean() { 060 previousCleaningCompletionTimestamp = EnvironmentEdgeManager.currentTime(); 061 } 062 063 @Override 064 public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) { 065 if (stopped) { 066 return Collections.emptyList(); 067 } 068 069 // We use filenames because the HFile will have been moved to the archive since it 070 // was registered. 071 final Set<String> hfileFilenames = new HashSet<>(); 072 try (BackupSystemTable tbl = new BackupSystemTable(connection)) { 073 Set<TableName> tablesIncludedInBackups = fetchFullyBackedUpTables(tbl); 074 for (BulkLoad bulkLoad : tbl.readBulkloadRows(tablesIncludedInBackups)) { 075 hfileFilenames.add(new Path(bulkLoad.getHfilePath()).getName()); 076 } 077 LOG.debug("Found {} unique HFile filenames registered as bulk loads.", hfileFilenames.size()); 078 } catch (IOException ioe) { 079 LOG.error( 080 "Failed to read registered bulk load references from backup system table, marking all files as non-deletable.", 081 ioe); 082 return Collections.emptyList(); 083 } 084 085 // Pin the threshold, we don't want the result to change depending on evaluation time. 086 final long recentFileThreshold = previousCleaningCompletionTimestamp; 087 088 return Iterables.filter(files, file -> { 089 // If the file is recent, be conservative and wait for one more scan of the bulk loads 090 if (file.getModificationTime() > recentFileThreshold) { 091 LOG.debug("Preventing deletion due to timestamp: {}", file.getPath().toString()); 092 return false; 093 } 094 // A file can be deleted if it is not registered as a backup bulk load. 095 String hfile = file.getPath().getName(); 096 if (hfileFilenames.contains(hfile)) { 097 LOG.debug("Preventing deletion due to bulk load registration in backup system table: {}", 098 file.getPath().toString()); 099 return false; 100 } else { 101 LOG.debug("OK to delete: {}", file.getPath().toString()); 102 return true; 103 } 104 }); 105 } 106 107 protected Set<TableName> fetchFullyBackedUpTables(BackupSystemTable tbl) throws IOException { 108 return tbl.getTablesIncludedInBackups(); 109 } 110 111 @Override 112 public boolean isFileDeletable(FileStatus fStat) { 113 throw new IllegalStateException("This method should not be called"); 114 } 115 116 @Override 117 public void setConf(Configuration config) { 118 this.connection = null; 119 try { 120 this.connection = ConnectionFactory.createConnection(config); 121 } catch (IOException ioe) { 122 LOG.error("Couldn't establish connection", ioe); 123 } 124 } 125 126 @Override 127 public void stop(String why) { 128 if (this.stopped) { 129 return; 130 } 131 if (this.connection != null) { 132 try { 133 this.connection.close(); 134 } catch (IOException ioe) { 135 LOG.debug("Got IOException when closing connection", ioe); 136 } 137 } 138 this.stopped = true; 139 } 140 141 @Override 142 public boolean isStopped() { 143 return this.stopped; 144 } 145 146 @Override 147 public void abort(String why, Throwable e) { 148 LOG.warn("Aborting ReplicationHFileCleaner because {}", why, e); 149 this.aborted = true; 150 stop(why); 151 } 152 153 @Override 154 public boolean isAborted() { 155 return this.aborted; 156 } 157}