001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.impl; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.List; 024import java.util.Map; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileStatus; 027import org.apache.hadoop.fs.FileSystem; 028import org.apache.hadoop.fs.Path; 029import org.apache.hadoop.fs.PathFilter; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.TableName; 032import org.apache.hadoop.hbase.backup.util.BackupUtils; 033import org.apache.hadoop.hbase.client.Connection; 034import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; 035import org.apache.hadoop.hbase.util.CommonFSUtils; 036import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041/** 042 * After a full backup was created, the incremental backup will only store the changes made after 043 * the last full or incremental backup. Creating the backup copies the logfiles in .logs and 044 * .oldlogs since the last backup timestamp. 045 */ 046@InterfaceAudience.Private 047public class IncrementalBackupManager extends BackupManager { 048 public static final Logger LOG = LoggerFactory.getLogger(IncrementalBackupManager.class); 049 050 public IncrementalBackupManager(Connection conn, Configuration conf) throws IOException { 051 super(conn, conf); 052 } 053 054 /** 055 * Obtain the list of logs that need to be copied out for this incremental backup. The list is set 056 * in BackupInfo. 057 * @return The new HashMap of RS log time stamps after the log roll for this incremental backup. 058 * @throws IOException exception 059 */ 060 public Map<String, Long> getIncrBackupLogFileMap() throws IOException { 061 List<String> logList; 062 Map<String, Long> newTimestamps; 063 Map<String, Long> previousTimestampMins = 064 BackupUtils.getRSLogTimestampMins(readLogTimestampMap()); 065 066 // get all new log files from .logs and .oldlogs after last TS and before new timestamp 067 if (previousTimestampMins.isEmpty()) { 068 throw new IOException("Cannot read any previous back up timestamps from backup system table. " 069 + "In order to create an incremental backup, at least one full backup is needed."); 070 } 071 072 LOG.info("Execute roll log procedure for incremental backup ..."); 073 BackupUtils.logRoll(conn, backupInfo.getBackupRootDir(), conf); 074 075 newTimestamps = readRegionServerLastLogRollResult(); 076 077 logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf); 078 logList = excludeProcV2WALs(logList); 079 backupInfo.setIncrBackupFileList(logList); 080 081 return newTimestamps; 082 } 083 084 private List<String> excludeProcV2WALs(List<String> logList) { 085 List<String> list = new ArrayList<>(); 086 for (int i = 0; i < logList.size(); i++) { 087 Path p = new Path(logList.get(i)); 088 String name = p.getName(); 089 090 if (name.startsWith(WALProcedureStore.LOG_PREFIX)) { 091 continue; 092 } 093 094 list.add(logList.get(i)); 095 } 096 return list; 097 } 098 099 /** 100 * For each region server: get all log files newer than the last timestamps but not newer than the 101 * newest timestamps. 102 * @param olderTimestamps the timestamp for each region server of the last backup. 103 * @param newestTimestamps the timestamp for each region server that the backup should lead to. 104 * @param conf the Hadoop and Hbase configuration 105 * @return a list of log files to be backed up 106 * @throws IOException exception 107 */ 108 private List<String> getLogFilesForNewBackup(Map<String, Long> olderTimestamps, 109 Map<String, Long> newestTimestamps, Configuration conf) throws IOException { 110 LOG.debug("In getLogFilesForNewBackup()\n" + "olderTimestamps: " + olderTimestamps 111 + "\n newestTimestamps: " + newestTimestamps); 112 113 long prevBackupStartTs = Collections.min(olderTimestamps.values()); 114 Path walRootDir = CommonFSUtils.getWALRootDir(conf); 115 Path logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME); 116 Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME); 117 FileSystem fs = walRootDir.getFileSystem(conf); 118 NewestLogFilter pathFilter = new NewestLogFilter(); 119 120 List<String> resultLogFiles = new ArrayList<>(); 121 List<String> newestLogs = new ArrayList<>(); 122 123 /* 124 * The old region servers and timestamps info we kept in backup system table may be out of sync 125 * if new region server is added or existing one lost. We'll deal with it here when processing 126 * the logs. If data in backup system table has more hosts, just ignore it. If the .logs 127 * directory includes more hosts, the additional hosts will not have old timestamps to compare 128 * with. We'll just use all the logs in that directory. We always write up-to-date region server 129 * and timestamp info to backup system table at the end of successful backup. 130 */ 131 FileStatus[] rss; 132 Path p; 133 String host; 134 Long oldTimeStamp; 135 String currentLogFile; 136 long currentLogTS; 137 138 // Get the files in .logs. 139 rss = fs.listStatus(logDir); 140 for (FileStatus rs : rss) { 141 p = rs.getPath(); 142 host = BackupUtils.parseHostNameFromLogFile(p); 143 if (host == null) { 144 continue; 145 } 146 FileStatus[] logs; 147 oldTimeStamp = olderTimestamps.get(host); 148 // It is possible that there is no old timestamp in backup system table for this host if 149 // this region server is newly added after our last backup. 150 if (oldTimeStamp == null) { 151 logs = fs.listStatus(p); 152 } else { 153 pathFilter.setLastBackupTS(oldTimeStamp); 154 logs = fs.listStatus(p, pathFilter); 155 } 156 for (FileStatus log : logs) { 157 LOG.debug("currentLogFile: " + log.getPath().toString()); 158 if (AbstractFSWALProvider.isMetaFile(log.getPath())) { 159 if (LOG.isDebugEnabled()) { 160 LOG.debug("Skip {} log file: {}", TableName.META_TABLE_NAME, log.getPath().getName()); 161 } 162 continue; 163 } 164 currentLogFile = log.getPath().toString(); 165 resultLogFiles.add(currentLogFile); 166 currentLogTS = BackupUtils.getCreationTime(log.getPath()); 167 168 // If newestTimestamps.get(host) is null, means that 169 // either RS (host) has been restarted recently with different port number 170 // or RS is down (was decommisioned). In any case, we treat this 171 // log file as eligible for inclusion into incremental backup log list 172 Long ts = newestTimestamps.get(host); 173 if (ts == null) { 174 LOG.warn("ORPHAN log found: " + log + " host=" + host); 175 LOG.debug("Known hosts (from newestTimestamps):"); 176 for (String s : newestTimestamps.keySet()) { 177 LOG.debug(s); 178 } 179 } 180 if (ts == null || currentLogTS > ts) { 181 newestLogs.add(currentLogFile); 182 } 183 } 184 } 185 186 // Include the .oldlogs files too. 187 FileStatus[] oldlogs = fs.listStatus(oldLogDir); 188 for (FileStatus oldlog : oldlogs) { 189 p = oldlog.getPath(); 190 currentLogFile = p.toString(); 191 if (AbstractFSWALProvider.isMetaFile(p)) { 192 if (LOG.isDebugEnabled()) { 193 LOG.debug("Skip .meta log file: " + currentLogFile); 194 } 195 continue; 196 } 197 host = BackupUtils.parseHostFromOldLog(p); 198 if (host == null) { 199 continue; 200 } 201 currentLogTS = BackupUtils.getCreationTime(p); 202 oldTimeStamp = olderTimestamps.get(host); 203 /* 204 * It is possible that there is no old timestamp in backup system table for this host. At the 205 * time of our last backup operation, this rs did not exist. The reason can be one of the two: 206 * 1. The rs already left/crashed. Its logs were moved to .oldlogs. 2. The rs was added after 207 * our last backup. 208 */ 209 if (oldTimeStamp == null) { 210 if (currentLogTS < prevBackupStartTs) { 211 // This log file is really old, its region server was before our last backup. 212 continue; 213 } else { 214 resultLogFiles.add(currentLogFile); 215 } 216 } else if (currentLogTS > oldTimeStamp) { 217 resultLogFiles.add(currentLogFile); 218 } 219 220 // It is possible that a host in .oldlogs is an obsolete region server 221 // so newestTimestamps.get(host) here can be null. 222 // Even if these logs belong to a obsolete region server, we still need 223 // to include they to avoid loss of edits for backup. 224 Long newTimestamp = newestTimestamps.get(host); 225 if (newTimestamp == null || currentLogTS > newTimestamp) { 226 newestLogs.add(currentLogFile); 227 } 228 } 229 // remove newest log per host because they are still in use 230 resultLogFiles.removeAll(newestLogs); 231 return resultLogFiles; 232 } 233 234 static class NewestLogFilter implements PathFilter { 235 private Long lastBackupTS = 0L; 236 237 public NewestLogFilter() { 238 } 239 240 protected void setLastBackupTS(Long ts) { 241 this.lastBackupTS = ts; 242 } 243 244 @Override 245 public boolean accept(Path path) { 246 // skip meta table log -- ts.meta file 247 if (AbstractFSWALProvider.isMetaFile(path)) { 248 if (LOG.isDebugEnabled()) { 249 LOG.debug("Skip .meta log file: " + path.getName()); 250 } 251 return false; 252 } 253 long timestamp; 254 try { 255 timestamp = BackupUtils.getCreationTime(path); 256 return timestamp > lastBackupTS; 257 } catch (Exception e) { 258 LOG.warn("Cannot read timestamp of log file " + path); 259 return false; 260 } 261 } 262 } 263}