001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.snapshot;
019
020import java.io.IOException;
021import java.util.List;
022import java.util.concurrent.CancellationException;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.fs.FileSystem;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.ServerName;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.client.TableDescriptor;
030import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
031import org.apache.hadoop.hbase.errorhandling.ForeignException;
032import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
033import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
034import org.apache.hadoop.hbase.executor.EventHandler;
035import org.apache.hadoop.hbase.executor.EventType;
036import org.apache.hadoop.hbase.master.MasterServices;
037import org.apache.hadoop.hbase.master.MetricsSnapshot;
038import org.apache.hadoop.hbase.master.SnapshotSentinel;
039import org.apache.hadoop.hbase.master.locking.LockManager;
040import org.apache.hadoop.hbase.master.locking.LockManager.MasterLock;
041import org.apache.hadoop.hbase.monitoring.MonitoredTask;
042import org.apache.hadoop.hbase.monitoring.TaskMonitor;
043import org.apache.hadoop.hbase.procedure2.LockType;
044import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
045import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
046import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
047import org.apache.hadoop.hbase.util.CommonFSUtils;
048import org.apache.hadoop.hbase.util.Pair;
049import org.apache.yetus.audience.InterfaceAudience;
050import org.apache.zookeeper.KeeperException;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
055
056import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
058
059/**
060 * A handler for taking snapshots from the master. This is not a subclass of TableEventHandler
061 * because using that would incur an extra hbase:meta scan. The {@link #snapshotRegions(List)} call
062 * should get implemented for each snapshot flavor.
063 */
064@InterfaceAudience.Private
065public abstract class TakeSnapshotHandler extends EventHandler
066  implements SnapshotSentinel, ForeignExceptionSnare {
067  private static final Logger LOG = LoggerFactory.getLogger(TakeSnapshotHandler.class);
068
069  private volatile boolean finished;
070
071  // none of these should ever be null
072  protected final MasterServices master;
073  protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
074  protected final SnapshotDescription snapshot;
075  protected final Configuration conf;
076  protected final FileSystem rootFs;
077  protected final FileSystem workingDirFs;
078  protected final Path rootDir;
079  private final Path snapshotDir;
080  protected final Path workingDir;
081  private final MasterSnapshotVerifier verifier;
082  protected final ForeignExceptionDispatcher monitor;
083  private final LockManager.MasterLock tableLock;
084  protected final MonitoredTask status;
085  protected final TableName snapshotTable;
086  protected final SnapshotManifest snapshotManifest;
087  protected final SnapshotManager snapshotManager;
088
089  protected TableDescriptor htd;
090
091  /**
092   * @param snapshot       descriptor of the snapshot to take
093   * @param masterServices master services provider
094   * @throws IllegalArgumentException if the working snapshot directory set from the configuration
095   *                                  is the same as the completed snapshot directory
096   * @throws IOException              if the file system of the working snapshot directory cannot be
097   *                                  determined
098   */
099  public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
100    final SnapshotManager snapshotManager) throws IOException {
101    super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
102    assert snapshot != null : "SnapshotDescription must not be nul1";
103    assert masterServices != null : "MasterServices must not be nul1";
104    this.master = masterServices;
105    this.conf = this.master.getConfiguration();
106    this.rootDir = this.master.getMasterFileSystem().getRootDir();
107    this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, conf);
108    Preconditions.checkArgument(
109      !SnapshotDescriptionUtils.isSubDirectoryOf(workingDir, rootDir)
110        || SnapshotDescriptionUtils.isWithinDefaultWorkingDir(workingDir, conf),
111      "The working directory " + workingDir + " cannot be in the root directory unless it is "
112        + "within the default working directory");
113
114    this.snapshot = snapshot;
115    this.snapshotManager = snapshotManager;
116    this.snapshotTable = TableName.valueOf(snapshot.getTable());
117    this.rootFs = this.master.getMasterFileSystem().getFileSystem();
118    this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
119    this.workingDirFs = this.workingDir.getFileSystem(this.conf);
120    this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
121
122    this.tableLock = master.getLockManager().createMasterLock(snapshotTable, LockType.EXCLUSIVE,
123      this.getClass().getName() + ": take snapshot " + snapshot.getName());
124
125    // prepare the verify
126    this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, workingDirFs);
127    // update the running tasks
128    this.status = TaskMonitor.get().createStatus(
129      "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable, false, true);
130    this.snapshotManifest =
131      SnapshotManifest.create(conf, rootFs, workingDir, snapshot, monitor, status);
132  }
133
134  private TableDescriptor loadTableDescriptor() throws IOException {
135    TableDescriptor htd = this.master.getTableDescriptors().get(snapshotTable);
136    if (htd == null) {
137      throw new IOException("TableDescriptor missing for " + snapshotTable);
138    }
139    if (htd.getMaxFileSize() == -1 && this.snapshot.getMaxFileSize() > 0) {
140      htd = TableDescriptorBuilder.newBuilder(htd).setValue(TableDescriptorBuilder.MAX_FILESIZE,
141        Long.toString(this.snapshot.getMaxFileSize())).build();
142    }
143    return htd;
144  }
145
146  @Override
147  public TakeSnapshotHandler prepare() throws Exception {
148    super.prepare();
149    // after this, you should ensure to release this lock in case of exceptions
150    this.tableLock.acquire();
151    try {
152      this.htd = loadTableDescriptor(); // check that .tableinfo is present
153    } catch (Exception e) {
154      this.tableLock.release();
155      throw e;
156    }
157    return this;
158  }
159
160  /**
161   * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} call
162   * should get implemented for each snapshot flavor.
163   */
164  @Override
165  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION",
166      justification = "Intentional")
167  public void process() {
168    String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
169      + eventType + " on table " + snapshotTable;
170    LOG.info(msg);
171    MasterLock tableLockToRelease = this.tableLock;
172    status.setStatus(msg);
173    try {
174      if (downgradeToSharedTableLock()) {
175        // release the exclusive lock and hold the shared lock instead
176        tableLockToRelease = master.getLockManager().createMasterLock(snapshotTable,
177          LockType.SHARED, this.getClass().getName() + ": take snapshot " + snapshot.getName());
178        tableLock.release();
179        tableLockToRelease.acquire();
180      }
181      // If regions move after this meta scan, the region specific snapshot should fail, triggering
182      // an external exception that gets captured here.
183
184      // write down the snapshot info in the working directory
185      SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs);
186      snapshotManifest.addTableDescriptor(this.htd);
187      monitor.rethrowException();
188
189      List<Pair<RegionInfo, ServerName>> regionsAndLocations =
190        master.getAssignmentManager().getTableRegionsAndLocations(snapshotTable, false);
191
192      // run the snapshot
193      snapshotRegions(regionsAndLocations);
194      monitor.rethrowException();
195
196      // flush the in-memory state, and write the single manifest
197      status.setStatus("Consolidate snapshot: " + snapshot.getName());
198      snapshotManifest.consolidate();
199
200      // verify the snapshot is valid
201      status.setStatus("Verifying snapshot: " + snapshot.getName());
202      verifier.verifySnapshot(workingDir, true);
203
204      // complete the snapshot, atomically moving from tmp to .snapshot dir.
205      SnapshotDescriptionUtils.completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs,
206        this.workingDirFs, this.conf);
207      finished = true;
208      msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
209      status.markComplete(msg);
210      LOG.info(msg);
211      metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
212      if (master.getMasterCoprocessorHost() != null) {
213        master.getMasterCoprocessorHost()
214          .postCompletedSnapshotAction(ProtobufUtil.createSnapshotDesc(snapshot), this.htd);
215      }
216    } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION
217      status.abort("Failed to complete snapshot " + snapshot.getName() + " on table "
218        + snapshotTable + " because " + e.getMessage());
219      String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
220        + " due to exception:" + e.getMessage();
221      LOG.error(reason, e);
222      ForeignException ee = new ForeignException(reason, e);
223      monitor.receive(ee);
224      // need to mark this completed to close off and allow cleanup to happen.
225      cancel(reason);
226    } finally {
227      LOG.debug("Launching cleanup of working dir:" + workingDir);
228      try {
229        // if the working dir is still present, the snapshot has failed. it is present we delete
230        // it.
231        if (!workingDirFs.delete(workingDir, true)) {
232          LOG.error("Couldn't delete snapshot working directory:" + workingDir);
233        }
234      } catch (IOException e) {
235        LOG.error("Couldn't delete snapshot working directory:" + workingDir);
236      }
237      if (LOG.isDebugEnabled()) {
238        LOG.debug("Table snapshot journal : \n" + status.prettyPrintJournal());
239      }
240      tableLockToRelease.release();
241    }
242  }
243
244  /**
245   * When taking snapshot, first we must acquire the exclusive table lock to confirm that there are
246   * no ongoing merge/split procedures. But later, we should try our best to release the exclusive
247   * lock as this may hurt the availability, because we need to hold the shared lock when assigning
248   * regions.
249   * <p/>
250   * See HBASE-21480 for more details.
251   */
252  protected abstract boolean downgradeToSharedTableLock();
253
254  /**
255   * Snapshot the specified regions
256   */
257  protected abstract void snapshotRegions(List<Pair<RegionInfo, ServerName>> regions)
258    throws IOException, KeeperException;
259
260  /**
261   * Take a snapshot of the specified disabled region
262   */
263  protected void snapshotDisabledRegion(final RegionInfo regionInfo) throws IOException {
264    snapshotManifest.addRegion(CommonFSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
265    monitor.rethrowException();
266    status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString()
267      + " of table: " + snapshotTable);
268  }
269
270  @Override
271  public void cancel(String why) {
272    if (finished) return;
273
274    this.finished = true;
275    LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)
276      + " because: " + why);
277    CancellationException ce = new CancellationException(why);
278    monitor.receive(new ForeignException(master.getServerName().toString(), ce));
279  }
280
281  @Override
282  public boolean isFinished() {
283    return finished;
284  }
285
286  @Override
287  public long getCompletionTimestamp() {
288    return this.status.getCompletionTimestamp();
289  }
290
291  @Override
292  public SnapshotDescription getSnapshot() {
293    return snapshot;
294  }
295
296  @Override
297  public ForeignException getExceptionIfFailed() {
298    return monitor.getException();
299  }
300
301  @Override
302  public void rethrowExceptionIfFailed() throws ForeignException {
303    monitor.rethrowException();
304  }
305
306  @Override
307  public void rethrowException() throws ForeignException {
308    monitor.rethrowException();
309  }
310
311  @Override
312  public boolean hasException() {
313    return monitor.hasException();
314  }
315
316  @Override
317  public ForeignException getException() {
318    return monitor.getException();
319  }
320}