001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.snapshot;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Collections;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030import java.util.concurrent.ConcurrentHashMap;
031import java.util.concurrent.Executors;
032import java.util.concurrent.ScheduledExecutorService;
033import java.util.concurrent.ScheduledFuture;
034import java.util.concurrent.ThreadPoolExecutor;
035import java.util.concurrent.TimeUnit;
036import java.util.concurrent.locks.ReadWriteLock;
037import java.util.concurrent.locks.ReentrantReadWriteLock;
038import org.apache.hadoop.conf.Configuration;
039import org.apache.hadoop.fs.FSDataInputStream;
040import org.apache.hadoop.fs.FileStatus;
041import org.apache.hadoop.fs.FileSystem;
042import org.apache.hadoop.fs.Path;
043import org.apache.hadoop.hbase.HBaseInterfaceAudience;
044import org.apache.hadoop.hbase.HConstants;
045import org.apache.hadoop.hbase.Stoppable;
046import org.apache.hadoop.hbase.TableName;
047import org.apache.hadoop.hbase.client.TableDescriptor;
048import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
049import org.apache.hadoop.hbase.client.TableState;
050import org.apache.hadoop.hbase.errorhandling.ForeignException;
051import org.apache.hadoop.hbase.executor.ExecutorService;
052import org.apache.hadoop.hbase.ipc.RpcServer;
053import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
054import org.apache.hadoop.hbase.master.MasterFileSystem;
055import org.apache.hadoop.hbase.master.MasterServices;
056import org.apache.hadoop.hbase.master.MetricsMaster;
057import org.apache.hadoop.hbase.master.SnapshotSentinel;
058import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
059import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner;
060import org.apache.hadoop.hbase.master.procedure.CloneSnapshotProcedure;
061import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
062import org.apache.hadoop.hbase.master.procedure.RestoreSnapshotProcedure;
063import org.apache.hadoop.hbase.procedure.MasterProcedureManager;
064import org.apache.hadoop.hbase.procedure.Procedure;
065import org.apache.hadoop.hbase.procedure.ProcedureCoordinator;
066import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs;
067import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinator;
068import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
069import org.apache.hadoop.hbase.security.AccessDeniedException;
070import org.apache.hadoop.hbase.security.User;
071import org.apache.hadoop.hbase.security.access.AccessChecker;
072import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclCleaner;
073import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper;
074import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
075import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
076import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
077import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
078import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
079import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
080import org.apache.hadoop.hbase.snapshot.SnapshotExistsException;
081import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
082import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
083import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException;
084import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
085import org.apache.hadoop.hbase.util.CommonFSUtils;
086import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
087import org.apache.hadoop.hbase.util.NonceKey;
088import org.apache.hadoop.hbase.util.TableDescriptorChecker;
089import org.apache.yetus.audience.InterfaceAudience;
090import org.apache.yetus.audience.InterfaceStability;
091import org.apache.zookeeper.KeeperException;
092import org.slf4j.Logger;
093import org.slf4j.LoggerFactory;
094
095import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
096
097import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
098import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameStringPair;
099import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ProcedureDescription;
100import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
101import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription.Type;
102
103/**
104 * This class manages the procedure of taking and restoring snapshots. There is only one
105 * SnapshotManager for the master.
106 * <p>
107 * The class provides methods for monitoring in-progress snapshot actions.
108 * <p>
109 * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a
110 * simplification in the current implementation.
111 */
112@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
113@InterfaceStability.Unstable
114public class SnapshotManager extends MasterProcedureManager implements Stoppable {
115  private static final Logger LOG = LoggerFactory.getLogger(SnapshotManager.class);
116
117  /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */
118  private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500;
119
120  /**
121   * Wait time before removing a finished sentinel from the in-progress map
122   *
123   * NOTE: This is used as a safety auto cleanup.
124   * The snapshot and restore handlers map entries are removed when a user asks if a snapshot or
125   * restore is completed. This operation is part of the HBaseAdmin snapshot/restore API flow.
126   * In case something fails on the client side and the snapshot/restore state is not reclaimed
127   * after a default timeout, the entry is removed from the in-progress map.
128   * At this point, if the user asks for the snapshot/restore status, the result will be
129   * snapshot done if exists or failed if it doesn't exists.
130   */
131  public static final String HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS =
132      "hbase.snapshot.sentinels.cleanup.timeoutMillis";
133  public static final long SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT = 60 * 1000L;
134
135  /** Enable or disable snapshot support */
136  public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled";
137
138  /**
139   * Conf key for # of ms elapsed between checks for snapshot errors while waiting for
140   * completion.
141   */
142  private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
143
144  /** Name of the operation to use in the controller */
145  public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
146
147  /** Conf key for # of threads used by the SnapshotManager thread pool */
148  public static final String SNAPSHOT_POOL_THREADS_KEY = "hbase.snapshot.master.threads";
149
150  /** number of current operations running on the master */
151  public static final int SNAPSHOT_POOL_THREADS_DEFAULT = 1;
152
153  private boolean stopped;
154  private MasterServices master;  // Needed by TableEventHandlers
155  private ProcedureCoordinator coordinator;
156
157  // Is snapshot feature enabled?
158  private boolean isSnapshotSupported = false;
159
160  // Snapshot handlers map, with table name as key.
161  // The map is always accessed and modified under the object lock using synchronized.
162  // snapshotTable() will insert an Handler in the table.
163  // isSnapshotDone() will remove the handler requested if the operation is finished.
164  private final Map<TableName, SnapshotSentinel> snapshotHandlers = new ConcurrentHashMap<>();
165  private final ScheduledExecutorService scheduleThreadPool =
166      Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder()
167          .setNameFormat("SnapshotHandlerChoreCleaner").setDaemon(true).build());
168  private ScheduledFuture<?> snapshotHandlerChoreCleanerTask;
169
170  // Restore map, with table name as key, procedure ID as value.
171  // The map is always accessed and modified under the object lock using synchronized.
172  // restoreSnapshot()/cloneSnapshot() will insert a procedure ID in the map.
173  //
174  // TODO: just as the Apache HBase 1.x implementation, this map would not survive master
175  // restart/failover. This is just a stopgap implementation until implementation of taking
176  // snapshot using Procedure-V2.
177  private Map<TableName, Long> restoreTableToProcIdMap = new HashMap<>();
178
179  private Path rootDir;
180  private ExecutorService executorService;
181
182  /**
183   * Read write lock between taking snapshot and snapshot HFile cleaner. The cleaner should skip to
184   * check the HFiles if any snapshot is in progress, otherwise it may clean a HFile which would
185   * belongs to the newly creating snapshot. So we should grab the write lock first when cleaner
186   * start to work. (See HBASE-21387)
187   */
188  private ReentrantReadWriteLock takingSnapshotLock = new ReentrantReadWriteLock(true);
189
190  public SnapshotManager() {}
191
192  /**
193   * Fully specify all necessary components of a snapshot manager. Exposed for testing.
194   * @param master services for the master where the manager is running
195   * @param coordinator procedure coordinator instance.  exposed for testing.
196   * @param pool HBase ExecutorServcie instance, exposed for testing.
197   */
198  @InterfaceAudience.Private
199  SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator,
200      ExecutorService pool, int sentinelCleanInterval)
201      throws IOException, UnsupportedOperationException {
202    this.master = master;
203
204    this.rootDir = master.getMasterFileSystem().getRootDir();
205    Configuration conf = master.getConfiguration();
206    checkSnapshotSupport(conf, master.getMasterFileSystem());
207
208    this.coordinator = coordinator;
209    this.executorService = pool;
210    resetTempDir();
211    snapshotHandlerChoreCleanerTask = this.scheduleThreadPool.scheduleAtFixedRate(
212      this::cleanupSentinels, sentinelCleanInterval, sentinelCleanInterval, TimeUnit.SECONDS);
213  }
214
215  /**
216   * Gets the list of all completed snapshots.
217   * @return list of SnapshotDescriptions
218   * @throws IOException File system exception
219   */
220  public List<SnapshotDescription> getCompletedSnapshots() throws IOException {
221    return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir), true);
222  }
223
224  /**
225   * Gets the list of all completed snapshots.
226   * @param snapshotDir snapshot directory
227   * @param withCpCall Whether to call CP hooks
228   * @return list of SnapshotDescriptions
229   * @throws IOException File system exception
230   */
231  private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir, boolean withCpCall)
232      throws IOException {
233    List<SnapshotDescription> snapshotDescs = new ArrayList<>();
234    // first create the snapshot root path and check to see if it exists
235    FileSystem fs = master.getMasterFileSystem().getFileSystem();
236    if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
237
238    // if there are no snapshots, return an empty list
239    if (!fs.exists(snapshotDir)) {
240      return snapshotDescs;
241    }
242
243    // ignore all the snapshots in progress
244    FileStatus[] snapshots = fs.listStatus(snapshotDir,
245      new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
246    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
247    withCpCall = withCpCall && cpHost != null;
248    // loop through all the completed snapshots
249    for (FileStatus snapshot : snapshots) {
250      Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
251      // if the snapshot is bad
252      if (!fs.exists(info)) {
253        LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist");
254        continue;
255      }
256      FSDataInputStream in = null;
257      try {
258        in = fs.open(info);
259        SnapshotDescription desc = SnapshotDescription.parseFrom(in);
260        org.apache.hadoop.hbase.client.SnapshotDescription descPOJO = (withCpCall)
261            ? ProtobufUtil.createSnapshotDesc(desc) : null;
262        if (withCpCall) {
263          try {
264            cpHost.preListSnapshot(descPOJO);
265          } catch (AccessDeniedException e) {
266            LOG.warn("Current user does not have access to " + desc.getName() + " snapshot. "
267                + "Either you should be owner of this snapshot or admin user.");
268            // Skip this and try for next snapshot
269            continue;
270          }
271        }
272        snapshotDescs.add(desc);
273
274        // call coproc post hook
275        if (withCpCall) {
276          cpHost.postListSnapshot(descPOJO);
277        }
278      } catch (IOException e) {
279        LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e);
280      } finally {
281        if (in != null) {
282          in.close();
283        }
284      }
285    }
286    return snapshotDescs;
287  }
288
289  /**
290   * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed
291   * snapshot attempts.
292   *
293   * @throws IOException if we can't reach the filesystem
294   */
295  private void resetTempDir() throws IOException {
296    // cleanup any existing snapshots.
297    Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir,
298        master.getConfiguration());
299    FileSystem tmpFs = tmpdir.getFileSystem(master.getConfiguration());
300    if (!tmpFs.delete(tmpdir, true)) {
301      LOG.warn("Couldn't delete working snapshot directory: " + tmpdir);
302    }
303  }
304
305  /**
306   * Delete the specified snapshot
307   * @param snapshot
308   * @throws SnapshotDoesNotExistException If the specified snapshot does not exist.
309   * @throws IOException For filesystem IOExceptions
310   */
311  public void deleteSnapshot(SnapshotDescription snapshot) throws IOException {
312    // check to see if it is completed
313    if (!isSnapshotCompleted(snapshot)) {
314      throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(snapshot));
315    }
316
317    String snapshotName = snapshot.getName();
318    // first create the snapshot description and check to see if it exists
319    FileSystem fs = master.getMasterFileSystem().getFileSystem();
320    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
321    // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with
322    // just the "name" and it does not contains the "real" snapshot information
323    snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
324
325    // call coproc pre hook
326    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
327    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
328    if (cpHost != null) {
329      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
330      cpHost.preDeleteSnapshot(snapshotPOJO);
331    }
332
333    LOG.debug("Deleting snapshot: " + snapshotName);
334    // delete the existing snapshot
335    if (!fs.delete(snapshotDir, true)) {
336      throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
337    }
338
339    // call coproc post hook
340    if (cpHost != null) {
341      cpHost.postDeleteSnapshot(snapshotPOJO);
342    }
343
344  }
345
346  /**
347   * Check if the specified snapshot is done
348   *
349   * @param expected
350   * @return true if snapshot is ready to be restored, false if it is still being taken.
351   * @throws IOException IOException if error from HDFS or RPC
352   * @throws UnknownSnapshotException if snapshot is invalid or does not exist.
353   */
354  public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
355    // check the request to make sure it has a snapshot
356    if (expected == null) {
357      throw new UnknownSnapshotException(
358         "No snapshot name passed in request, can't figure out which snapshot you want to check.");
359    }
360
361    String ssString = ClientSnapshotDescriptionUtils.toString(expected);
362
363    // check to see if the sentinel exists,
364    // and if the task is complete removes it from the in-progress snapshots map.
365    SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected);
366
367    // stop tracking "abandoned" handlers
368    cleanupSentinels();
369
370    if (handler == null) {
371      // If there's no handler in the in-progress map, it means one of the following:
372      //   - someone has already requested the snapshot state
373      //   - the requested snapshot was completed long time ago (cleanupSentinels() timeout)
374      //   - the snapshot was never requested
375      // In those cases returns to the user the "done state" if the snapshots exists on disk,
376      // otherwise raise an exception saying that the snapshot is not running and doesn't exist.
377      if (!isSnapshotCompleted(expected)) {
378        throw new UnknownSnapshotException("Snapshot " + ssString
379            + " is not currently running or one of the known completed snapshots.");
380      }
381      // was done, return true;
382      return true;
383    }
384
385    // pass on any failure we find in the sentinel
386    try {
387      handler.rethrowExceptionIfFailed();
388    } catch (ForeignException e) {
389      // Give some procedure info on an exception.
390      String status;
391      Procedure p = coordinator.getProcedure(expected.getName());
392      if (p != null) {
393        status = p.getStatus();
394      } else {
395        status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
396      }
397      throw new HBaseSnapshotException("Snapshot " + ssString +  " had an error.  " + status, e,
398        ProtobufUtil.createSnapshotDesc(expected));
399    }
400
401    // check to see if we are done
402    if (handler.isFinished()) {
403      LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
404      return true;
405    } else if (LOG.isDebugEnabled()) {
406      LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
407    }
408    return false;
409  }
410
411  /**
412   * Check to see if there is a snapshot in progress with the same name or on the same table.
413   * Currently we have a limitation only allowing a single snapshot per table at a time. Also we
414   * don't allow snapshot with the same name.
415   * @param snapshot description of the snapshot being checked.
416   * @return <tt>true</tt> if there is a snapshot in progress with the same name or on the same
417   *         table.
418   */
419  synchronized boolean isTakingSnapshot(final SnapshotDescription snapshot) {
420    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
421    if (isTakingSnapshot(snapshotTable)) {
422      return true;
423    }
424    Iterator<Map.Entry<TableName, SnapshotSentinel>> it = this.snapshotHandlers.entrySet().iterator();
425    while (it.hasNext()) {
426      Map.Entry<TableName, SnapshotSentinel> entry = it.next();
427      SnapshotSentinel sentinel = entry.getValue();
428      if (snapshot.getName().equals(sentinel.getSnapshot().getName()) && !sentinel.isFinished()) {
429        return true;
430      }
431    }
432    return false;
433  }
434
435  /**
436   * Check to see if the specified table has a snapshot in progress.  Currently we have a
437   * limitation only allowing a single snapshot per table at a time.
438   * @param tableName name of the table being snapshotted.
439   * @return <tt>true</tt> if there is a snapshot in progress on the specified table.
440   */
441  public boolean isTakingSnapshot(final TableName tableName) {
442    SnapshotSentinel handler = this.snapshotHandlers.get(tableName);
443    return handler != null && !handler.isFinished();
444  }
445
446  /**
447   * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we
448   * aren't already running a snapshot or restore on the requested table.
449   * @param snapshot description of the snapshot we want to start
450   * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot
451   */
452  private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
453      throws HBaseSnapshotException {
454    Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
455        master.getConfiguration());
456    TableName snapshotTable =
457        TableName.valueOf(snapshot.getTable());
458
459    // make sure we aren't already running a snapshot
460    if (isTakingSnapshot(snapshot)) {
461      SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable);
462      throw new SnapshotCreationException("Rejected taking "
463          + ClientSnapshotDescriptionUtils.toString(snapshot)
464          + " because we are already running another snapshot "
465          + (handler != null ? ("on the same table " +
466              ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()))
467              : "with the same name"), ProtobufUtil.createSnapshotDesc(snapshot));
468    }
469
470    // make sure we aren't running a restore on the same table
471    if (isRestoringTable(snapshotTable)) {
472      throw new SnapshotCreationException("Rejected taking "
473          + ClientSnapshotDescriptionUtils.toString(snapshot)
474          + " because we are already have a restore in progress on the same snapshot.");
475    }
476
477    try {
478      FileSystem workingDirFS = workingDir.getFileSystem(master.getConfiguration());
479      // delete the working directory, since we aren't running the snapshot. Likely leftovers
480      // from a failed attempt.
481      workingDirFS.delete(workingDir, true);
482
483      // recreate the working directory for the snapshot
484      if (!workingDirFS.mkdirs(workingDir)) {
485        throw new SnapshotCreationException("Couldn't create working directory (" + workingDir
486            + ") for snapshot" , ProtobufUtil.createSnapshotDesc(snapshot));
487      }
488    } catch (HBaseSnapshotException e) {
489      throw e;
490    } catch (IOException e) {
491      throw new SnapshotCreationException(
492          "Exception while checking to see if snapshot could be started.", e,
493          ProtobufUtil.createSnapshotDesc(snapshot));
494    }
495  }
496
497  /**
498   * Take a snapshot of a disabled table.
499   * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}.
500   * @throws IOException if the snapshot could not be started or filesystem for snapshot
501   *         temporary directory could not be determined
502   */
503  private synchronized void snapshotDisabledTable(SnapshotDescription snapshot)
504      throws IOException {
505    // setup the snapshot
506    prepareToTakeSnapshot(snapshot);
507
508    // set the snapshot to be a disabled snapshot, since the client doesn't know about that
509    snapshot = snapshot.toBuilder().setType(Type.DISABLED).build();
510
511    // Take the snapshot of the disabled table
512    DisabledTableSnapshotHandler handler =
513        new DisabledTableSnapshotHandler(snapshot, master, this);
514    snapshotTable(snapshot, handler);
515  }
516
517  /**
518   * Take a snapshot of an enabled table.
519   * @param snapshot description of the snapshot to take.
520   * @throws IOException if the snapshot could not be started or filesystem for snapshot
521   *         temporary directory could not be determined
522   */
523  private synchronized void snapshotEnabledTable(SnapshotDescription snapshot)
524          throws IOException {
525    // setup the snapshot
526    prepareToTakeSnapshot(snapshot);
527
528    // Take the snapshot of the enabled table
529    EnabledTableSnapshotHandler handler =
530        new EnabledTableSnapshotHandler(snapshot, master, this);
531    snapshotTable(snapshot, handler);
532  }
533
534  /**
535   * Take a snapshot using the specified handler.
536   * On failure the snapshot temporary working directory is removed.
537   * NOTE: prepareToTakeSnapshot() called before this one takes care of the rejecting the
538   *       snapshot request if the table is busy with another snapshot/restore operation.
539   * @param snapshot the snapshot description
540   * @param handler the snapshot handler
541   */
542  private synchronized void snapshotTable(SnapshotDescription snapshot,
543      final TakeSnapshotHandler handler) throws IOException {
544    try {
545      handler.prepare();
546      this.executorService.submit(handler);
547      this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler);
548    } catch (Exception e) {
549      // cleanup the working directory by trying to delete it from the fs.
550      Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
551          master.getConfiguration());
552      FileSystem workingDirFs = workingDir.getFileSystem(master.getConfiguration());
553      try {
554        if (!workingDirFs.delete(workingDir, true)) {
555          LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
556              ClientSnapshotDescriptionUtils.toString(snapshot));
557        }
558      } catch (IOException e1) {
559        LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
560            ClientSnapshotDescriptionUtils.toString(snapshot));
561      }
562      // fail the snapshot
563      throw new SnapshotCreationException("Could not build snapshot handler", e,
564        ProtobufUtil.createSnapshotDesc(snapshot));
565    }
566  }
567
568  public ReadWriteLock getTakingSnapshotLock() {
569    return this.takingSnapshotLock;
570  }
571
572  /**
573   * The snapshot operation processing as following: <br>
574   * 1. Create a Snapshot Handler, and do some initialization; <br>
575   * 2. Put the handler into snapshotHandlers <br>
576   * So when we consider if any snapshot is taking, we should consider both the takingSnapshotLock
577   * and snapshotHandlers;
578   * @return true to indicate that there're some running snapshots.
579   */
580  public synchronized boolean isTakingAnySnapshot() {
581    return this.takingSnapshotLock.getReadHoldCount() > 0 || this.snapshotHandlers.size() > 0;
582  }
583
584  /**
585   * Take a snapshot based on the enabled/disabled state of the table.
586   * @param snapshot
587   * @throws HBaseSnapshotException when a snapshot specific exception occurs.
588   * @throws IOException when some sort of generic IO exception occurs.
589   */
590  public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
591    this.takingSnapshotLock.readLock().lock();
592    try {
593      takeSnapshotInternal(snapshot);
594    } finally {
595      this.takingSnapshotLock.readLock().unlock();
596    }
597  }
598
599  private void takeSnapshotInternal(SnapshotDescription snapshot) throws IOException {
600    // check to see if we already completed the snapshot
601    if (isSnapshotCompleted(snapshot)) {
602      throw new SnapshotExistsException(
603          "Snapshot '" + snapshot.getName() + "' already stored on the filesystem.",
604          ProtobufUtil.createSnapshotDesc(snapshot));
605    }
606
607    LOG.debug("No existing snapshot, attempting snapshot...");
608
609    // stop tracking "abandoned" handlers
610    cleanupSentinels();
611
612    // check to see if the table exists
613    TableDescriptor desc = null;
614    try {
615      desc = master.getTableDescriptors().get(
616          TableName.valueOf(snapshot.getTable()));
617    } catch (FileNotFoundException e) {
618      String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
619      LOG.error(msg);
620      throw new SnapshotCreationException(msg, e, ProtobufUtil.createSnapshotDesc(snapshot));
621    } catch (IOException e) {
622      throw new SnapshotCreationException(
623          "Error while geting table description for table " + snapshot.getTable(), e,
624          ProtobufUtil.createSnapshotDesc(snapshot));
625    }
626    if (desc == null) {
627      throw new SnapshotCreationException(
628          "Table '" + snapshot.getTable() + "' doesn't exist, can't take snapshot.",
629          ProtobufUtil.createSnapshotDesc(snapshot));
630    }
631    SnapshotDescription.Builder builder = snapshot.toBuilder();
632    // if not specified, set the snapshot format
633    if (!snapshot.hasVersion()) {
634      builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION);
635    }
636    RpcServer.getRequestUser().ifPresent(user -> {
637      if (AccessChecker.isAuthorizationSupported(master.getConfiguration())) {
638        builder.setOwner(user.getShortName());
639      }
640    });
641    snapshot = builder.build();
642
643    // call pre coproc hook
644    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
645    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
646    if (cpHost != null) {
647      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
648      cpHost.preSnapshot(snapshotPOJO, desc);
649    }
650
651    // if the table is enabled, then have the RS run actually the snapshot work
652    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
653    if (master.getTableStateManager().isTableState(snapshotTable,
654        TableState.State.ENABLED)) {
655      if (LOG.isDebugEnabled()) {
656        LOG.debug("Table enabled, starting distributed snapshots for {}",
657          ClientSnapshotDescriptionUtils.toString(snapshot));
658      }
659      snapshotEnabledTable(snapshot);
660      if (LOG.isDebugEnabled()) {
661        LOG.debug("Started snapshot: {}", ClientSnapshotDescriptionUtils.toString(snapshot));
662      }
663    }
664    // For disabled table, snapshot is created by the master
665    else if (master.getTableStateManager().isTableState(snapshotTable,
666        TableState.State.DISABLED)) {
667      if (LOG.isDebugEnabled()) {
668        LOG.debug("Table is disabled, running snapshot entirely on master for {}",
669          ClientSnapshotDescriptionUtils.toString(snapshot));
670      }
671      snapshotDisabledTable(snapshot);
672      if (LOG.isDebugEnabled()) {
673        LOG.debug("Started snapshot: {}", ClientSnapshotDescriptionUtils.toString(snapshot));
674      }
675    } else {
676      LOG.error("Can't snapshot table '" + snapshot.getTable()
677          + "', isn't open or closed, we don't know what to do!");
678      TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable()
679          + " isn't fully open.");
680      throw new SnapshotCreationException("Table is not entirely open or closed", tpoe,
681        ProtobufUtil.createSnapshotDesc(snapshot));
682    }
683
684    // call post coproc hook
685    if (cpHost != null) {
686      cpHost.postSnapshot(snapshotPOJO, desc);
687    }
688  }
689
690  /**
691   * Set the handler for the current snapshot
692   * <p>
693   * Exposed for TESTING
694   * @param tableName
695   * @param handler handler the master should use
696   *
697   * TODO get rid of this if possible, repackaging, modify tests.
698   */
699  public synchronized void setSnapshotHandlerForTesting(
700      final TableName tableName,
701      final SnapshotSentinel handler) {
702    if (handler != null) {
703      this.snapshotHandlers.put(tableName, handler);
704    } else {
705      this.snapshotHandlers.remove(tableName);
706    }
707  }
708
709  /**
710   * @return distributed commit coordinator for all running snapshots
711   */
712  ProcedureCoordinator getCoordinator() {
713    return coordinator;
714  }
715
716  /**
717   * Check to see if the snapshot is one of the currently completed snapshots
718   * Returns true if the snapshot exists in the "completed snapshots folder".
719   *
720   * @param snapshot expected snapshot to check
721   * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
722   *         not stored
723   * @throws IOException if the filesystem throws an unexpected exception,
724   * @throws IllegalArgumentException if snapshot name is invalid.
725   */
726  private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
727    try {
728      final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
729      FileSystem fs = master.getMasterFileSystem().getFileSystem();
730      // check to see if the snapshot already exists
731      return fs.exists(snapshotDir);
732    } catch (IllegalArgumentException iae) {
733      throw new UnknownSnapshotException("Unexpected exception thrown", iae);
734    }
735  }
736
737  /**
738   * Clone the specified snapshot.
739   * The clone will fail if the destination table has a snapshot or restore in progress.
740   *
741   * @param reqSnapshot Snapshot Descriptor from request
742   * @param tableName table to clone
743   * @param snapshot Snapshot Descriptor
744   * @param snapshotTableDesc Table Descriptor
745   * @param nonceKey unique identifier to prevent duplicated RPC
746   * @return procId the ID of the clone snapshot procedure
747   * @throws IOException
748   */
749  private long cloneSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName,
750      final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc,
751      final NonceKey nonceKey, final boolean restoreAcl) throws IOException {
752    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
753    TableDescriptor htd = TableDescriptorBuilder.copy(tableName, snapshotTableDesc);
754    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
755    if (cpHost != null) {
756      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
757      cpHost.preCloneSnapshot(snapshotPOJO, htd);
758    }
759    long procId;
760    try {
761      procId = cloneSnapshot(snapshot, htd, nonceKey, restoreAcl);
762    } catch (IOException e) {
763      LOG.error("Exception occurred while cloning the snapshot " + snapshot.getName()
764        + " as table " + tableName.getNameAsString(), e);
765      throw e;
766    }
767    LOG.info("Clone snapshot=" + snapshot.getName() + " as table=" + tableName);
768
769    if (cpHost != null) {
770      cpHost.postCloneSnapshot(snapshotPOJO, htd);
771    }
772    return procId;
773  }
774
775  /**
776   * Clone the specified snapshot into a new table.
777   * The operation will fail if the destination table has a snapshot or restore in progress.
778   *
779   * @param snapshot Snapshot Descriptor
780   * @param tableDescriptor Table Descriptor of the table to create
781   * @param nonceKey unique identifier to prevent duplicated RPC
782   * @return procId the ID of the clone snapshot procedure
783   */
784  synchronized long cloneSnapshot(final SnapshotDescription snapshot,
785      final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl)
786      throws HBaseSnapshotException {
787    TableName tableName = tableDescriptor.getTableName();
788
789    // make sure we aren't running a snapshot on the same table
790    if (isTakingSnapshot(tableName)) {
791      throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
792    }
793
794    // make sure we aren't running a restore on the same table
795    if (isRestoringTable(tableName)) {
796      throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
797    }
798
799    try {
800      long procId = master.getMasterProcedureExecutor().submitProcedure(
801        new CloneSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(),
802                tableDescriptor, snapshot, restoreAcl),
803        nonceKey);
804      this.restoreTableToProcIdMap.put(tableName, procId);
805      return procId;
806    } catch (Exception e) {
807      String msg = "Couldn't clone the snapshot="
808        + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + tableName;
809      LOG.error(msg, e);
810      throw new RestoreSnapshotException(msg, e);
811    }
812  }
813
814  /**
815   * Restore or Clone the specified snapshot
816   * @param reqSnapshot
817   * @param nonceKey unique identifier to prevent duplicated RPC
818   * @throws IOException
819   */
820  public long restoreOrCloneSnapshot(final SnapshotDescription reqSnapshot, final NonceKey nonceKey,
821      final boolean restoreAcl) throws IOException {
822    FileSystem fs = master.getMasterFileSystem().getFileSystem();
823    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir);
824
825    // check if the snapshot exists
826    if (!fs.exists(snapshotDir)) {
827      LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist.");
828      throw new SnapshotDoesNotExistException(
829        ProtobufUtil.createSnapshotDesc(reqSnapshot));
830    }
831
832    // Get snapshot info from file system. The reqSnapshot is a "fake" snapshotInfo with
833    // just the snapshot "name" and table name to restore. It does not contains the "real" snapshot
834    // information.
835    SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
836    SnapshotManifest manifest = SnapshotManifest.open(master.getConfiguration(), fs,
837        snapshotDir, snapshot);
838    TableDescriptor snapshotTableDesc = manifest.getTableDescriptor();
839    TableName tableName = TableName.valueOf(reqSnapshot.getTable());
840
841    // sanity check the new table descriptor
842    TableDescriptorChecker.sanityCheck(master.getConfiguration(), snapshotTableDesc);
843
844    // stop tracking "abandoned" handlers
845    cleanupSentinels();
846
847    // Verify snapshot validity
848    SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest);
849
850    // Execute the restore/clone operation
851    long procId;
852    if (master.getTableDescriptors().exists(tableName)) {
853      procId = restoreSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey,
854        restoreAcl);
855    } else {
856      procId =
857          cloneSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey, restoreAcl);
858    }
859    return procId;
860  }
861
862  /**
863   * Restore the specified snapshot. The restore will fail if the destination table has a snapshot
864   * or restore in progress.
865   * @param reqSnapshot Snapshot Descriptor from request
866   * @param tableName table to restore
867   * @param snapshot Snapshot Descriptor
868   * @param snapshotTableDesc Table Descriptor
869   * @param nonceKey unique identifier to prevent duplicated RPC
870   * @param restoreAcl true to restore acl of snapshot
871   * @return procId the ID of the restore snapshot procedure
872   * @throws IOException
873   */
874  private long restoreSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName,
875      final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc,
876      final NonceKey nonceKey, final boolean restoreAcl) throws IOException {
877    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
878
879    if (master.getTableStateManager().isTableState(
880      TableName.valueOf(snapshot.getTable()), TableState.State.ENABLED)) {
881      throw new UnsupportedOperationException("Table '" +
882        TableName.valueOf(snapshot.getTable()) + "' must be disabled in order to " +
883        "perform a restore operation.");
884    }
885
886    // call Coprocessor pre hook
887    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
888    if (cpHost != null) {
889      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
890      cpHost.preRestoreSnapshot(snapshotPOJO, snapshotTableDesc);
891    }
892
893    long procId;
894    try {
895      procId = restoreSnapshot(snapshot, snapshotTableDesc, nonceKey, restoreAcl);
896    } catch (IOException e) {
897      LOG.error("Exception occurred while restoring the snapshot " + snapshot.getName()
898        + " as table " + tableName.getNameAsString(), e);
899      throw e;
900    }
901    LOG.info("Restore snapshot=" + snapshot.getName() + " as table=" + tableName);
902
903    if (cpHost != null) {
904      cpHost.postRestoreSnapshot(snapshotPOJO, snapshotTableDesc);
905    }
906
907    return procId;
908  }
909
910  /**
911   * Restore the specified snapshot. The restore will fail if the destination table has a snapshot
912   * or restore in progress.
913   * @param snapshot Snapshot Descriptor
914   * @param tableDescriptor Table Descriptor
915   * @param nonceKey unique identifier to prevent duplicated RPC
916   * @param restoreAcl true to restore acl of snapshot
917   * @return procId the ID of the restore snapshot procedure
918   */
919  private synchronized long restoreSnapshot(final SnapshotDescription snapshot,
920      final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl)
921      throws HBaseSnapshotException {
922    final TableName tableName = tableDescriptor.getTableName();
923
924    // make sure we aren't running a snapshot on the same table
925    if (isTakingSnapshot(tableName)) {
926      throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
927    }
928
929    // make sure we aren't running a restore on the same table
930    if (isRestoringTable(tableName)) {
931      throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
932    }
933
934    try {
935      long procId = master.getMasterProcedureExecutor().submitProcedure(
936        new RestoreSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(),
937                tableDescriptor, snapshot, restoreAcl),
938        nonceKey);
939      this.restoreTableToProcIdMap.put(tableName, procId);
940      return procId;
941    } catch (Exception e) {
942      String msg = "Couldn't restore the snapshot=" + ClientSnapshotDescriptionUtils.toString(
943          snapshot)  +
944          " on table=" + tableName;
945      LOG.error(msg, e);
946      throw new RestoreSnapshotException(msg, e);
947    }
948  }
949
950  /**
951   * Verify if the restore of the specified table is in progress.
952   *
953   * @param tableName table under restore
954   * @return <tt>true</tt> if there is a restore in progress of the specified table.
955   */
956  private synchronized boolean isRestoringTable(final TableName tableName) {
957    Long procId = this.restoreTableToProcIdMap.get(tableName);
958    if (procId == null) {
959      return false;
960    }
961    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
962    if (procExec.isRunning() && !procExec.isFinished(procId)) {
963      return true;
964    } else {
965      this.restoreTableToProcIdMap.remove(tableName);
966      return false;
967    }
968  }
969
970  /**
971   * Return the handler if it is currently live and has the same snapshot target name.
972   * The handler is removed from the sentinels map if completed.
973   * @param sentinels live handlers
974   * @param snapshot snapshot description
975   * @return null if doesn't match, else a live handler.
976   */
977  private synchronized SnapshotSentinel removeSentinelIfFinished(
978      final Map<TableName, SnapshotSentinel> sentinels,
979      final SnapshotDescription snapshot) {
980    if (!snapshot.hasTable()) {
981      return null;
982    }
983
984    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
985    SnapshotSentinel h = sentinels.get(snapshotTable);
986    if (h == null) {
987      return null;
988    }
989
990    if (!h.getSnapshot().getName().equals(snapshot.getName())) {
991      // specified snapshot is to the one currently running
992      return null;
993    }
994
995    // Remove from the "in-progress" list once completed
996    if (h.isFinished()) {
997      sentinels.remove(snapshotTable);
998    }
999
1000    return h;
1001  }
1002
1003  /**
1004   * Removes "abandoned" snapshot/restore requests.
1005   * As part of the HBaseAdmin snapshot/restore API the operation status is checked until completed,
1006   * and the in-progress maps are cleaned up when the status of a completed task is requested.
1007   * To avoid having sentinels staying around for long time if something client side is failed,
1008   * each operation tries to clean up the in-progress maps sentinels finished from a long time.
1009   */
1010  private void cleanupSentinels() {
1011    cleanupSentinels(this.snapshotHandlers);
1012    cleanupCompletedRestoreInMap();
1013  }
1014
1015  /**
1016   * Remove the sentinels that are marked as finished and the completion time
1017   * has exceeded the removal timeout.
1018   * @param sentinels map of sentinels to clean
1019   */
1020  private synchronized void cleanupSentinels(final Map<TableName, SnapshotSentinel> sentinels) {
1021    long currentTime = EnvironmentEdgeManager.currentTime();
1022    long sentinelsCleanupTimeoutMillis =
1023        master.getConfiguration().getLong(HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS,
1024          SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT);
1025    Iterator<Map.Entry<TableName, SnapshotSentinel>> it = sentinels.entrySet().iterator();
1026    while (it.hasNext()) {
1027      Map.Entry<TableName, SnapshotSentinel> entry = it.next();
1028      SnapshotSentinel sentinel = entry.getValue();
1029      if (sentinel.isFinished()
1030          && (currentTime - sentinel.getCompletionTimestamp()) > sentinelsCleanupTimeoutMillis) {
1031        it.remove();
1032      }
1033    }
1034  }
1035
1036  /**
1037   * Remove the procedures that are marked as finished
1038   */
1039  private synchronized void cleanupCompletedRestoreInMap() {
1040    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
1041    Iterator<Map.Entry<TableName, Long>> it = restoreTableToProcIdMap.entrySet().iterator();
1042    while (it.hasNext()) {
1043      Map.Entry<TableName, Long> entry = it.next();
1044      Long procId = entry.getValue();
1045      if (procExec.isRunning() && procExec.isFinished(procId)) {
1046        it.remove();
1047      }
1048    }
1049  }
1050
1051  //
1052  // Implementing Stoppable interface
1053  //
1054
1055  @Override
1056  public void stop(String why) {
1057    // short circuit
1058    if (this.stopped) return;
1059    // make sure we get stop
1060    this.stopped = true;
1061    // pass the stop onto take snapshot handlers
1062    for (SnapshotSentinel snapshotHandler: this.snapshotHandlers.values()) {
1063      snapshotHandler.cancel(why);
1064    }
1065    if (snapshotHandlerChoreCleanerTask != null) {
1066      snapshotHandlerChoreCleanerTask.cancel(true);
1067    }
1068    try {
1069      if (coordinator != null) {
1070        coordinator.close();
1071      }
1072    } catch (IOException e) {
1073      LOG.error("stop ProcedureCoordinator error", e);
1074    }
1075  }
1076
1077  @Override
1078  public boolean isStopped() {
1079    return this.stopped;
1080  }
1081
1082  /**
1083   * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported.
1084   * Called at the beginning of snapshot() and restoreSnapshot() methods.
1085   * @throws UnsupportedOperationException if snapshot are not supported
1086   */
1087  public void checkSnapshotSupport() throws UnsupportedOperationException {
1088    if (!this.isSnapshotSupported) {
1089      throw new UnsupportedOperationException(
1090        "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" +
1091          HBASE_SNAPSHOT_ENABLED + "' property with value 'true'.");
1092    }
1093  }
1094
1095  /**
1096   * Called at startup, to verify if snapshot operation is supported, and to avoid
1097   * starting the master if there're snapshots present but the cleaners needed are missing.
1098   * Otherwise we can end up with snapshot data loss.
1099   * @param conf The {@link Configuration} object to use
1100   * @param mfs The MasterFileSystem to use
1101   * @throws IOException in case of file-system operation failure
1102   * @throws UnsupportedOperationException in case cleaners are missing and
1103   *         there're snapshot in the system
1104   */
1105  private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs)
1106      throws IOException, UnsupportedOperationException {
1107    // Verify if snapshot is disabled by the user
1108    String enabled = conf.get(HBASE_SNAPSHOT_ENABLED);
1109    boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false);
1110    boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled);
1111
1112    // Extract cleaners from conf
1113    Set<String> hfileCleaners = new HashSet<>();
1114    String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
1115    if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
1116
1117    Set<String> logCleaners = new HashSet<>();
1118    cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
1119    if (cleaners != null) Collections.addAll(logCleaners, cleaners);
1120
1121    // check if an older version of snapshot directory was present
1122    Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME);
1123    FileSystem fs = mfs.getFileSystem();
1124    List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir), false);
1125    if (ss != null && !ss.isEmpty()) {
1126      LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir);
1127      LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME);
1128    }
1129
1130    // If the user has enabled the snapshot, we force the cleaners to be present
1131    // otherwise we still need to check if cleaners are enabled or not and verify
1132    // that there're no snapshot in the .snapshot folder.
1133    if (snapshotEnabled) {
1134      // Inject snapshot cleaners, if snapshot.enable is true
1135      hfileCleaners.add(SnapshotHFileCleaner.class.getName());
1136      hfileCleaners.add(HFileLinkCleaner.class.getName());
1137      // If sync acl to HDFS feature is enabled, then inject the cleaner
1138      if (SnapshotScannerHDFSAclHelper.isAclSyncToHdfsEnabled(conf)) {
1139        hfileCleaners.add(SnapshotScannerHDFSAclCleaner.class.getName());
1140      }
1141
1142      // Set cleaners conf
1143      conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
1144        hfileCleaners.toArray(new String[hfileCleaners.size()]));
1145      conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS,
1146        logCleaners.toArray(new String[logCleaners.size()]));
1147    } else {
1148      // Verify if cleaners are present
1149      snapshotEnabled =
1150        hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
1151        hfileCleaners.contains(HFileLinkCleaner.class.getName());
1152
1153      // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set.
1154      if (snapshotEnabled) {
1155        LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " +
1156          "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " +
1157          (userDisabled ? "is set to 'false'." : "is not set."));
1158      }
1159    }
1160
1161    // Mark snapshot feature as enabled if cleaners are present and user has not disabled it.
1162    this.isSnapshotSupported = snapshotEnabled && !userDisabled;
1163
1164    // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder
1165    // otherwise we end up with snapshot data loss.
1166    if (!snapshotEnabled) {
1167      LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners.");
1168      Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir());
1169      if (fs.exists(snapshotDir)) {
1170        FileStatus[] snapshots = CommonFSUtils.listStatus(fs, snapshotDir,
1171          new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
1172        if (snapshots != null) {
1173          LOG.error("Snapshots are present, but cleaners are not enabled.");
1174          checkSnapshotSupport();
1175        }
1176      }
1177    }
1178  }
1179
1180  @Override
1181  public void initialize(MasterServices master, MetricsMaster metricsMaster) throws KeeperException,
1182      IOException, UnsupportedOperationException {
1183    this.master = master;
1184
1185    this.rootDir = master.getMasterFileSystem().getRootDir();
1186    checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
1187
1188    // get the configuration for the coordinator
1189    Configuration conf = master.getConfiguration();
1190    long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
1191    long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY,
1192                    SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT),
1193            conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS,
1194                    SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME));
1195    int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
1196
1197    // setup the default procedure coordinator
1198    String name = master.getServerName().toString();
1199    ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, opThreads);
1200    ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinator(
1201        master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name);
1202
1203    this.coordinator = new ProcedureCoordinator(comms, tpool, timeoutMillis, wakeFrequency);
1204    this.executorService = master.getExecutorService();
1205    resetTempDir();
1206    snapshotHandlerChoreCleanerTask =
1207        scheduleThreadPool.scheduleAtFixedRate(this::cleanupSentinels, 10, 10, TimeUnit.SECONDS);
1208  }
1209
1210  @Override
1211  public String getProcedureSignature() {
1212    return ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION;
1213  }
1214
1215  @Override
1216  public void execProcedure(ProcedureDescription desc) throws IOException {
1217    takeSnapshot(toSnapshotDescription(desc));
1218  }
1219
1220  @Override
1221  public void checkPermissions(ProcedureDescription desc, AccessChecker accessChecker, User user)
1222      throws IOException {
1223    // Done by AccessController as part of preSnapshot coprocessor hook (legacy code path).
1224    // In future, when we AC is removed for good, that check should be moved here.
1225  }
1226
1227  @Override
1228  public boolean isProcedureDone(ProcedureDescription desc) throws IOException {
1229    return isSnapshotDone(toSnapshotDescription(desc));
1230  }
1231
1232  private SnapshotDescription toSnapshotDescription(ProcedureDescription desc)
1233      throws IOException {
1234    SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
1235    if (!desc.hasInstance()) {
1236      throw new IOException("Snapshot name is not defined: " + desc.toString());
1237    }
1238    String snapshotName = desc.getInstance();
1239    List<NameStringPair> props = desc.getConfigurationList();
1240    String table = null;
1241    for (NameStringPair prop : props) {
1242      if ("table".equalsIgnoreCase(prop.getName())) {
1243        table = prop.getValue();
1244      }
1245    }
1246    if (table == null) {
1247      throw new IOException("Snapshot table is not defined: " + desc.toString());
1248    }
1249    TableName tableName = TableName.valueOf(table);
1250    builder.setTable(tableName.getNameAsString());
1251    builder.setName(snapshotName);
1252    builder.setType(SnapshotDescription.Type.FLUSH);
1253    return builder.build();
1254  }
1255}