001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.impl; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.HashMap; 023import java.util.List; 024import java.util.Map; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileStatus; 027import org.apache.hadoop.fs.FileSystem; 028import org.apache.hadoop.fs.Path; 029import org.apache.hadoop.fs.PathFilter; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.TableName; 032import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager; 033import org.apache.hadoop.hbase.backup.util.BackupUtils; 034import org.apache.hadoop.hbase.client.Admin; 035import org.apache.hadoop.hbase.client.Connection; 036import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; 037import org.apache.hadoop.hbase.util.CommonFSUtils; 038import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; 039import org.apache.yetus.audience.InterfaceAudience; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** 044 * After a full backup was created, the incremental backup will only store the changes made after 045 * the last full or incremental backup. Creating the backup copies the logfiles in .logs and 046 * .oldlogs since the last backup timestamp. 047 */ 048@InterfaceAudience.Private 049public class IncrementalBackupManager extends BackupManager { 050 public static final Logger LOG = LoggerFactory.getLogger(IncrementalBackupManager.class); 051 052 public IncrementalBackupManager(Connection conn, Configuration conf) throws IOException { 053 super(conn, conf); 054 } 055 056 /** 057 * Obtain the list of logs that need to be copied out for this incremental backup. The list is set 058 * in BackupInfo. 059 * @return The new HashMap of RS log time stamps after the log roll for this incremental backup. 060 * @throws IOException exception 061 */ 062 public Map<String, Long> getIncrBackupLogFileMap() throws IOException { 063 List<String> logList; 064 Map<String, Long> newTimestamps; 065 Map<String, Long> previousTimestampMins; 066 067 String savedStartCode = readBackupStartCode(); 068 069 // key: tableName 070 // value: <RegionServer,PreviousTimeStamp> 071 Map<TableName, Map<String, Long>> previousTimestampMap = readLogTimestampMap(); 072 073 previousTimestampMins = BackupUtils.getRSLogTimestampMins(previousTimestampMap); 074 075 if (LOG.isDebugEnabled()) { 076 LOG.debug("StartCode " + savedStartCode + "for backupID " + backupInfo.getBackupId()); 077 } 078 // get all new log files from .logs and .oldlogs after last TS and before new timestamp 079 if ( 080 savedStartCode == null || previousTimestampMins == null || previousTimestampMins.isEmpty() 081 ) { 082 throw new IOException("Cannot read any previous back up timestamps from backup system table. " 083 + "In order to create an incremental backup, at least one full backup is needed."); 084 } 085 086 LOG.info("Execute roll log procedure for incremental backup ..."); 087 HashMap<String, String> props = new HashMap<>(); 088 props.put("backupRoot", backupInfo.getBackupRootDir()); 089 090 try (Admin admin = conn.getAdmin()) { 091 admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE, 092 LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props); 093 } 094 newTimestamps = readRegionServerLastLogRollResult(); 095 096 logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf, savedStartCode); 097 logList = excludeProcV2WALs(logList); 098 backupInfo.setIncrBackupFileList(logList); 099 100 return newTimestamps; 101 } 102 103 private List<String> excludeProcV2WALs(List<String> logList) { 104 List<String> list = new ArrayList<>(); 105 for (int i = 0; i < logList.size(); i++) { 106 Path p = new Path(logList.get(i)); 107 String name = p.getName(); 108 109 if (name.startsWith(WALProcedureStore.LOG_PREFIX)) { 110 continue; 111 } 112 113 list.add(logList.get(i)); 114 } 115 return list; 116 } 117 118 /** 119 * For each region server: get all log files newer than the last timestamps but not newer than the 120 * newest timestamps. 121 * @param olderTimestamps the timestamp for each region server of the last backup. 122 * @param newestTimestamps the timestamp for each region server that the backup should lead to. 123 * @param conf the Hadoop and Hbase configuration 124 * @param savedStartCode the startcode (timestamp) of last successful backup. 125 * @return a list of log files to be backed up 126 * @throws IOException exception 127 */ 128 private List<String> getLogFilesForNewBackup(Map<String, Long> olderTimestamps, 129 Map<String, Long> newestTimestamps, Configuration conf, String savedStartCode) 130 throws IOException { 131 LOG.debug("In getLogFilesForNewBackup()\n" + "olderTimestamps: " + olderTimestamps 132 + "\n newestTimestamps: " + newestTimestamps); 133 134 Path walRootDir = CommonFSUtils.getWALRootDir(conf); 135 Path logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME); 136 Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME); 137 FileSystem fs = walRootDir.getFileSystem(conf); 138 NewestLogFilter pathFilter = new NewestLogFilter(); 139 140 List<String> resultLogFiles = new ArrayList<>(); 141 List<String> newestLogs = new ArrayList<>(); 142 143 /* 144 * The old region servers and timestamps info we kept in backup system table may be out of sync 145 * if new region server is added or existing one lost. We'll deal with it here when processing 146 * the logs. If data in backup system table has more hosts, just ignore it. If the .logs 147 * directory includes more hosts, the additional hosts will not have old timestamps to compare 148 * with. We'll just use all the logs in that directory. We always write up-to-date region server 149 * and timestamp info to backup system table at the end of successful backup. 150 */ 151 FileStatus[] rss; 152 Path p; 153 String host; 154 Long oldTimeStamp; 155 String currentLogFile; 156 long currentLogTS; 157 158 // Get the files in .logs. 159 rss = fs.listStatus(logDir); 160 for (FileStatus rs : rss) { 161 p = rs.getPath(); 162 host = BackupUtils.parseHostNameFromLogFile(p); 163 if (host == null) { 164 continue; 165 } 166 FileStatus[] logs; 167 oldTimeStamp = olderTimestamps.get(host); 168 // It is possible that there is no old timestamp in backup system table for this host if 169 // this region server is newly added after our last backup. 170 if (oldTimeStamp == null) { 171 logs = fs.listStatus(p); 172 } else { 173 pathFilter.setLastBackupTS(oldTimeStamp); 174 logs = fs.listStatus(p, pathFilter); 175 } 176 for (FileStatus log : logs) { 177 LOG.debug("currentLogFile: " + log.getPath().toString()); 178 if (AbstractFSWALProvider.isMetaFile(log.getPath())) { 179 if (LOG.isDebugEnabled()) { 180 LOG.debug("Skip hbase:meta log file: " + log.getPath().getName()); 181 } 182 continue; 183 } 184 currentLogFile = log.getPath().toString(); 185 resultLogFiles.add(currentLogFile); 186 currentLogTS = BackupUtils.getCreationTime(log.getPath()); 187 188 // If newestTimestamps.get(host) is null, means that 189 // either RS (host) has been restarted recently with different port number 190 // or RS is down (was decommisioned). In any case, we treat this 191 // log file as eligible for inclusion into incremental backup log list 192 Long ts = newestTimestamps.get(host); 193 if (ts == null) { 194 LOG.warn("ORPHAN log found: " + log + " host=" + host); 195 LOG.debug("Known hosts (from newestTimestamps):"); 196 for (String s : newestTimestamps.keySet()) { 197 LOG.debug(s); 198 } 199 } 200 if (ts == null || currentLogTS > ts) { 201 newestLogs.add(currentLogFile); 202 } 203 } 204 } 205 206 // Include the .oldlogs files too. 207 FileStatus[] oldlogs = fs.listStatus(oldLogDir); 208 for (FileStatus oldlog : oldlogs) { 209 p = oldlog.getPath(); 210 currentLogFile = p.toString(); 211 if (AbstractFSWALProvider.isMetaFile(p)) { 212 if (LOG.isDebugEnabled()) { 213 LOG.debug("Skip .meta log file: " + currentLogFile); 214 } 215 continue; 216 } 217 host = BackupUtils.parseHostFromOldLog(p); 218 if (host == null) { 219 continue; 220 } 221 currentLogTS = BackupUtils.getCreationTime(p); 222 oldTimeStamp = olderTimestamps.get(host); 223 /* 224 * It is possible that there is no old timestamp in backup system table for this host. At the 225 * time of our last backup operation, this rs did not exist. The reason can be one of the two: 226 * 1. The rs already left/crashed. Its logs were moved to .oldlogs. 2. The rs was added after 227 * our last backup. 228 */ 229 if (oldTimeStamp == null) { 230 if (currentLogTS < Long.parseLong(savedStartCode)) { 231 // This log file is really old, its region server was before our last backup. 232 continue; 233 } else { 234 resultLogFiles.add(currentLogFile); 235 } 236 } else if (currentLogTS > oldTimeStamp) { 237 resultLogFiles.add(currentLogFile); 238 } 239 240 // It is possible that a host in .oldlogs is an obsolete region server 241 // so newestTimestamps.get(host) here can be null. 242 // Even if these logs belong to a obsolete region server, we still need 243 // to include they to avoid loss of edits for backup. 244 Long newTimestamp = newestTimestamps.get(host); 245 if (newTimestamp == null || currentLogTS > newTimestamp) { 246 newestLogs.add(currentLogFile); 247 } 248 } 249 // remove newest log per host because they are still in use 250 resultLogFiles.removeAll(newestLogs); 251 return resultLogFiles; 252 } 253 254 static class NewestLogFilter implements PathFilter { 255 private Long lastBackupTS = 0L; 256 257 public NewestLogFilter() { 258 } 259 260 protected void setLastBackupTS(Long ts) { 261 this.lastBackupTS = ts; 262 } 263 264 @Override 265 public boolean accept(Path path) { 266 // skip meta table log -- ts.meta file 267 if (AbstractFSWALProvider.isMetaFile(path)) { 268 if (LOG.isDebugEnabled()) { 269 LOG.debug("Skip .meta log file: " + path.getName()); 270 } 271 return false; 272 } 273 long timestamp; 274 try { 275 timestamp = BackupUtils.getCreationTime(path); 276 return timestamp > lastBackupTS; 277 } catch (Exception e) { 278 LOG.warn("Cannot read timestamp of log file " + path); 279 return false; 280 } 281 } 282 } 283}