001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup; 019 020import java.io.IOException; 021import java.util.Collections; 022import java.util.HashSet; 023import java.util.Set; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.FileStatus; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.Abortable; 028import org.apache.hadoop.hbase.HBaseInterfaceAudience; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; 031import org.apache.hadoop.hbase.backup.impl.BulkLoad; 032import org.apache.hadoop.hbase.client.Connection; 033import org.apache.hadoop.hbase.client.ConnectionFactory; 034import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; 035import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 041 042/** 043 * File cleaner that prevents deletion of HFiles that are still required by future incremental 044 * backups. 045 * <p> 046 * Bulk loaded HFiles that are needed by future updates are stored in the backup system table. 047 */ 048@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) 049public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements Abortable { 050 private static final Logger LOG = LoggerFactory.getLogger(BackupHFileCleaner.class); 051 052 private boolean stopped = false; 053 private boolean aborted = false; 054 private Connection connection; 055 // timestamp of most recent read from backup system table 056 private long prevReadFromBackupTbl = 0; 057 // timestamp of 2nd most recent read from backup system table 058 private long secondPrevReadFromBackupTbl = 0; 059 060 @Override 061 public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) { 062 if (stopped) { 063 return Collections.emptyList(); 064 } 065 066 // We use filenames because the HFile will have been moved to the archive since it 067 // was registered. 068 final Set<String> hfileFilenames = new HashSet<>(); 069 try (BackupSystemTable tbl = new BackupSystemTable(connection)) { 070 Set<TableName> tablesIncludedInBackups = fetchFullyBackedUpTables(tbl); 071 for (BulkLoad bulkLoad : tbl.readBulkloadRows(tablesIncludedInBackups)) { 072 hfileFilenames.add(new Path(bulkLoad.getHfilePath()).getName()); 073 } 074 LOG.debug("Found {} unique HFile filenames registered as bulk loads.", hfileFilenames.size()); 075 } catch (IOException ioe) { 076 LOG.error( 077 "Failed to read registered bulk load references from backup system table, marking all files as non-deletable.", 078 ioe); 079 return Collections.emptyList(); 080 } 081 082 secondPrevReadFromBackupTbl = prevReadFromBackupTbl; 083 prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime(); 084 085 return Iterables.filter(files, file -> { 086 // If the file is recent, be conservative and wait for one more scan of the bulk loads 087 if (file.getModificationTime() > secondPrevReadFromBackupTbl) { 088 LOG.debug("Preventing deletion due to timestamp: {}", file.getPath().toString()); 089 return false; 090 } 091 // A file can be deleted if it is not registered as a backup bulk load. 092 String hfile = file.getPath().getName(); 093 if (hfileFilenames.contains(hfile)) { 094 LOG.debug("Preventing deletion due to bulk load registration in backup system table: {}", 095 file.getPath().toString()); 096 return false; 097 } else { 098 LOG.debug("OK to delete: {}", file.getPath().toString()); 099 return true; 100 } 101 }); 102 } 103 104 protected Set<TableName> fetchFullyBackedUpTables(BackupSystemTable tbl) throws IOException { 105 return tbl.getTablesIncludedInBackups(); 106 } 107 108 @Override 109 public boolean isFileDeletable(FileStatus fStat) { 110 throw new IllegalStateException("This method should not be called"); 111 } 112 113 @Override 114 public void setConf(Configuration config) { 115 this.connection = null; 116 try { 117 this.connection = ConnectionFactory.createConnection(config); 118 } catch (IOException ioe) { 119 LOG.error("Couldn't establish connection", ioe); 120 } 121 } 122 123 @Override 124 public void stop(String why) { 125 if (this.stopped) { 126 return; 127 } 128 if (this.connection != null) { 129 try { 130 this.connection.close(); 131 } catch (IOException ioe) { 132 LOG.debug("Got IOException when closing connection", ioe); 133 } 134 } 135 this.stopped = true; 136 } 137 138 @Override 139 public boolean isStopped() { 140 return this.stopped; 141 } 142 143 @Override 144 public void abort(String why, Throwable e) { 145 LOG.warn("Aborting ReplicationHFileCleaner because {}", why, e); 146 this.aborted = true; 147 stop(why); 148 } 149 150 @Override 151 public boolean isAborted() { 152 return this.aborted; 153 } 154}