001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup.impl;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.Map;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileStatus;
027import org.apache.hadoop.fs.FileSystem;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.fs.PathFilter;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.backup.util.BackupUtils;
033import org.apache.hadoop.hbase.client.Connection;
034import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
035import org.apache.hadoop.hbase.util.CommonFSUtils;
036import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041/**
042 * After a full backup was created, the incremental backup will only store the changes made after
043 * the last full or incremental backup. Creating the backup copies the logfiles in .logs and
044 * .oldlogs since the last backup timestamp.
045 */
046@InterfaceAudience.Private
047public class IncrementalBackupManager extends BackupManager {
048  public static final Logger LOG = LoggerFactory.getLogger(IncrementalBackupManager.class);
049
050  public IncrementalBackupManager(Connection conn, Configuration conf) throws IOException {
051    super(conn, conf);
052  }
053
054  /**
055   * Obtain the list of logs that need to be copied out for this incremental backup. The list is set
056   * in BackupInfo.
057   * @return The new HashMap of RS log time stamps after the log roll for this incremental backup.
058   * @throws IOException exception
059   */
060  public Map<String, Long> getIncrBackupLogFileMap() throws IOException {
061    List<String> logList;
062    Map<String, Long> newTimestamps;
063    Map<String, Long> previousTimestampMins =
064      BackupUtils.getRSLogTimestampMins(readLogTimestampMap());
065
066    // get all new log files from .logs and .oldlogs after last TS and before new timestamp
067    if (previousTimestampMins.isEmpty()) {
068      throw new IOException("Cannot read any previous back up timestamps from backup system table. "
069        + "In order to create an incremental backup, at least one full backup is needed.");
070    }
071
072    LOG.info("Execute roll log procedure for incremental backup ...");
073    BackupUtils.logRoll(conn, backupInfo.getBackupRootDir(), conf);
074
075    newTimestamps = readRegionServerLastLogRollResult();
076
077    logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf);
078    logList = excludeProcV2WALs(logList);
079    backupInfo.setIncrBackupFileList(logList);
080
081    return newTimestamps;
082  }
083
084  private List<String> excludeProcV2WALs(List<String> logList) {
085    List<String> list = new ArrayList<>();
086    for (int i = 0; i < logList.size(); i++) {
087      Path p = new Path(logList.get(i));
088      String name = p.getName();
089
090      if (name.startsWith(WALProcedureStore.LOG_PREFIX)) {
091        continue;
092      }
093
094      list.add(logList.get(i));
095    }
096    return list;
097  }
098
099  /**
100   * For each region server: get all log files newer than the last timestamps but not newer than the
101   * newest timestamps.
102   * @param olderTimestamps  the timestamp for each region server of the last backup.
103   * @param newestTimestamps the timestamp for each region server that the backup should lead to.
104   * @param conf             the Hadoop and Hbase configuration
105   * @return a list of log files to be backed up
106   * @throws IOException exception
107   */
108  private List<String> getLogFilesForNewBackup(Map<String, Long> olderTimestamps,
109    Map<String, Long> newestTimestamps, Configuration conf) throws IOException {
110    LOG.debug("In getLogFilesForNewBackup()\n" + "olderTimestamps: " + olderTimestamps
111      + "\n newestTimestamps: " + newestTimestamps);
112
113    long prevBackupStartTs = Collections.min(olderTimestamps.values());
114    Path walRootDir = CommonFSUtils.getWALRootDir(conf);
115    Path logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);
116    Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
117    FileSystem fs = walRootDir.getFileSystem(conf);
118    NewestLogFilter pathFilter = new NewestLogFilter();
119
120    List<String> resultLogFiles = new ArrayList<>();
121    List<String> newestLogs = new ArrayList<>();
122
123    /*
124     * The old region servers and timestamps info we kept in backup system table may be out of sync
125     * if new region server is added or existing one lost. We'll deal with it here when processing
126     * the logs. If data in backup system table has more hosts, just ignore it. If the .logs
127     * directory includes more hosts, the additional hosts will not have old timestamps to compare
128     * with. We'll just use all the logs in that directory. We always write up-to-date region server
129     * and timestamp info to backup system table at the end of successful backup.
130     */
131    FileStatus[] rss;
132    Path p;
133    String host;
134    Long oldTimeStamp;
135    String currentLogFile;
136    long currentLogTS;
137
138    // Get the files in .logs.
139    rss = fs.listStatus(logDir);
140    for (FileStatus rs : rss) {
141      p = rs.getPath();
142      host = BackupUtils.parseHostNameFromLogFile(p);
143      if (host == null) {
144        continue;
145      }
146      FileStatus[] logs;
147      oldTimeStamp = olderTimestamps.get(host);
148      // It is possible that there is no old timestamp in backup system table for this host if
149      // this region server is newly added after our last backup.
150      if (oldTimeStamp == null) {
151        logs = fs.listStatus(p);
152      } else {
153        pathFilter.setLastBackupTS(oldTimeStamp);
154        logs = fs.listStatus(p, pathFilter);
155      }
156      for (FileStatus log : logs) {
157        LOG.debug("currentLogFile: " + log.getPath().toString());
158        if (AbstractFSWALProvider.isMetaFile(log.getPath())) {
159          if (LOG.isDebugEnabled()) {
160            LOG.debug("Skip {} log file: {}", TableName.META_TABLE_NAME, log.getPath().getName());
161          }
162          continue;
163        }
164        currentLogFile = log.getPath().toString();
165        resultLogFiles.add(currentLogFile);
166        currentLogTS = BackupUtils.getCreationTime(log.getPath());
167
168        // If newestTimestamps.get(host) is null, means that
169        // either RS (host) has been restarted recently with different port number
170        // or RS is down (was decommisioned). In any case, we treat this
171        // log file as eligible for inclusion into incremental backup log list
172        Long ts = newestTimestamps.get(host);
173        if (ts == null) {
174          LOG.warn("ORPHAN log found: " + log + " host=" + host);
175          LOG.debug("Known hosts (from newestTimestamps):");
176          for (String s : newestTimestamps.keySet()) {
177            LOG.debug(s);
178          }
179        }
180        if (ts == null || currentLogTS > ts) {
181          newestLogs.add(currentLogFile);
182        }
183      }
184    }
185
186    // Include the .oldlogs files too.
187    FileStatus[] oldlogs = fs.listStatus(oldLogDir);
188    for (FileStatus oldlog : oldlogs) {
189      p = oldlog.getPath();
190      currentLogFile = p.toString();
191      if (AbstractFSWALProvider.isMetaFile(p)) {
192        if (LOG.isDebugEnabled()) {
193          LOG.debug("Skip .meta log file: " + currentLogFile);
194        }
195        continue;
196      }
197      host = BackupUtils.parseHostFromOldLog(p);
198      if (host == null) {
199        continue;
200      }
201      currentLogTS = BackupUtils.getCreationTime(p);
202      oldTimeStamp = olderTimestamps.get(host);
203      /*
204       * It is possible that there is no old timestamp in backup system table for this host. At the
205       * time of our last backup operation, this rs did not exist. The reason can be one of the two:
206       * 1. The rs already left/crashed. Its logs were moved to .oldlogs. 2. The rs was added after
207       * our last backup.
208       */
209      if (oldTimeStamp == null) {
210        if (currentLogTS < prevBackupStartTs) {
211          // This log file is really old, its region server was before our last backup.
212          continue;
213        } else {
214          resultLogFiles.add(currentLogFile);
215        }
216      } else if (currentLogTS > oldTimeStamp) {
217        resultLogFiles.add(currentLogFile);
218      }
219
220      // It is possible that a host in .oldlogs is an obsolete region server
221      // so newestTimestamps.get(host) here can be null.
222      // Even if these logs belong to a obsolete region server, we still need
223      // to include they to avoid loss of edits for backup.
224      Long newTimestamp = newestTimestamps.get(host);
225      if (newTimestamp == null || currentLogTS > newTimestamp) {
226        newestLogs.add(currentLogFile);
227      }
228    }
229    // remove newest log per host because they are still in use
230    resultLogFiles.removeAll(newestLogs);
231    return resultLogFiles;
232  }
233
234  static class NewestLogFilter implements PathFilter {
235    private Long lastBackupTS = 0L;
236
237    public NewestLogFilter() {
238    }
239
240    protected void setLastBackupTS(Long ts) {
241      this.lastBackupTS = ts;
242    }
243
244    @Override
245    public boolean accept(Path path) {
246      // skip meta table log -- ts.meta file
247      if (AbstractFSWALProvider.isMetaFile(path)) {
248        if (LOG.isDebugEnabled()) {
249          LOG.debug("Skip .meta log file: " + path.getName());
250        }
251        return false;
252      }
253      long timestamp;
254      try {
255        timestamp = BackupUtils.getCreationTime(path);
256        return timestamp > lastBackupTS;
257      } catch (Exception e) {
258        LOG.warn("Cannot read timestamp of log file " + path);
259        return false;
260      }
261    }
262  }
263}