001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.snapshot;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Collections;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030import java.util.concurrent.ConcurrentHashMap;
031import java.util.concurrent.Executors;
032import java.util.concurrent.ScheduledExecutorService;
033import java.util.concurrent.ScheduledFuture;
034import java.util.concurrent.ThreadPoolExecutor;
035import java.util.concurrent.TimeUnit;
036import java.util.concurrent.locks.ReadWriteLock;
037import java.util.concurrent.locks.ReentrantReadWriteLock;
038import org.apache.hadoop.conf.Configuration;
039import org.apache.hadoop.fs.FSDataInputStream;
040import org.apache.hadoop.fs.FileStatus;
041import org.apache.hadoop.fs.FileSystem;
042import org.apache.hadoop.fs.Path;
043import org.apache.hadoop.hbase.HBaseInterfaceAudience;
044import org.apache.hadoop.hbase.HConstants;
045import org.apache.hadoop.hbase.MetaTableAccessor;
046import org.apache.hadoop.hbase.Stoppable;
047import org.apache.hadoop.hbase.TableName;
048import org.apache.hadoop.hbase.client.TableDescriptor;
049import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
050import org.apache.hadoop.hbase.client.TableState;
051import org.apache.hadoop.hbase.errorhandling.ForeignException;
052import org.apache.hadoop.hbase.executor.ExecutorService;
053import org.apache.hadoop.hbase.ipc.RpcServer;
054import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
055import org.apache.hadoop.hbase.master.MasterFileSystem;
056import org.apache.hadoop.hbase.master.MasterServices;
057import org.apache.hadoop.hbase.master.MetricsMaster;
058import org.apache.hadoop.hbase.master.SnapshotSentinel;
059import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
060import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner;
061import org.apache.hadoop.hbase.master.procedure.CloneSnapshotProcedure;
062import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
063import org.apache.hadoop.hbase.master.procedure.RestoreSnapshotProcedure;
064import org.apache.hadoop.hbase.procedure.MasterProcedureManager;
065import org.apache.hadoop.hbase.procedure.Procedure;
066import org.apache.hadoop.hbase.procedure.ProcedureCoordinator;
067import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs;
068import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinator;
069import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
070import org.apache.hadoop.hbase.security.AccessDeniedException;
071import org.apache.hadoop.hbase.security.User;
072import org.apache.hadoop.hbase.security.access.AccessChecker;
073import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclCleaner;
074import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper;
075import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
076import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
077import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
078import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
079import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
080import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
081import org.apache.hadoop.hbase.snapshot.SnapshotExistsException;
082import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
083import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
084import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException;
085import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
086import org.apache.hadoop.hbase.util.CommonFSUtils;
087import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
088import org.apache.hadoop.hbase.util.NonceKey;
089import org.apache.hadoop.hbase.util.TableDescriptorChecker;
090import org.apache.yetus.audience.InterfaceAudience;
091import org.apache.yetus.audience.InterfaceStability;
092import org.apache.zookeeper.KeeperException;
093import org.slf4j.Logger;
094import org.slf4j.LoggerFactory;
095
096import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
097import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
098
099import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
100import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameStringPair;
101import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ProcedureDescription;
102import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
103import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription.Type;
104
105/**
106 * This class manages the procedure of taking and restoring snapshots. There is only one
107 * SnapshotManager for the master.
108 * <p>
109 * The class provides methods for monitoring in-progress snapshot actions.
110 * <p>
111 * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a
112 * simplification in the current implementation.
113 */
114@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
115@InterfaceStability.Unstable
116public class SnapshotManager extends MasterProcedureManager implements Stoppable {
117  private static final Logger LOG = LoggerFactory.getLogger(SnapshotManager.class);
118
119  /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */
120  private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500;
121
122  /**
123   * Wait time before removing a finished sentinel from the in-progress map
124   *
125   * NOTE: This is used as a safety auto cleanup.
126   * The snapshot and restore handlers map entries are removed when a user asks if a snapshot or
127   * restore is completed. This operation is part of the HBaseAdmin snapshot/restore API flow.
128   * In case something fails on the client side and the snapshot/restore state is not reclaimed
129   * after a default timeout, the entry is removed from the in-progress map.
130   * At this point, if the user asks for the snapshot/restore status, the result will be
131   * snapshot done if exists or failed if it doesn't exists.
132   */
133  public static final String HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS =
134      "hbase.snapshot.sentinels.cleanup.timeoutMillis";
135  public static final long SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT = 60 * 1000L;
136
137  /** Enable or disable snapshot support */
138  public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled";
139
140  /**
141   * Conf key for # of ms elapsed between checks for snapshot errors while waiting for
142   * completion.
143   */
144  private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
145
146  /** Name of the operation to use in the controller */
147  public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
148
149  /** Conf key for # of threads used by the SnapshotManager thread pool */
150  public static final String SNAPSHOT_POOL_THREADS_KEY = "hbase.snapshot.master.threads";
151
152  /** number of current operations running on the master */
153  public static final int SNAPSHOT_POOL_THREADS_DEFAULT = 1;
154
155  private boolean stopped;
156  private MasterServices master;  // Needed by TableEventHandlers
157  private ProcedureCoordinator coordinator;
158
159  // Is snapshot feature enabled?
160  private boolean isSnapshotSupported = false;
161
162  // Snapshot handlers map, with table name as key.
163  // The map is always accessed and modified under the object lock using synchronized.
164  // snapshotTable() will insert an Handler in the table.
165  // isSnapshotDone() will remove the handler requested if the operation is finished.
166  private final Map<TableName, SnapshotSentinel> snapshotHandlers = new ConcurrentHashMap<>();
167  private final ScheduledExecutorService scheduleThreadPool =
168      Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder()
169          .setNameFormat("SnapshotHandlerChoreCleaner").setDaemon(true).build());
170  private ScheduledFuture<?> snapshotHandlerChoreCleanerTask;
171
172  // Restore map, with table name as key, procedure ID as value.
173  // The map is always accessed and modified under the object lock using synchronized.
174  // restoreSnapshot()/cloneSnapshot() will insert a procedure ID in the map.
175  //
176  // TODO: just as the Apache HBase 1.x implementation, this map would not survive master
177  // restart/failover. This is just a stopgap implementation until implementation of taking
178  // snapshot using Procedure-V2.
179  private Map<TableName, Long> restoreTableToProcIdMap = new HashMap<>();
180
181  private Path rootDir;
182  private ExecutorService executorService;
183
184  /**
185   * Read write lock between taking snapshot and snapshot HFile cleaner. The cleaner should skip to
186   * check the HFiles if any snapshot is in progress, otherwise it may clean a HFile which would
187   * belongs to the newly creating snapshot. So we should grab the write lock first when cleaner
188   * start to work. (See HBASE-21387)
189   */
190  private ReentrantReadWriteLock takingSnapshotLock = new ReentrantReadWriteLock(true);
191
192  public SnapshotManager() {}
193
194  /**
195   * Fully specify all necessary components of a snapshot manager. Exposed for testing.
196   * @param master services for the master where the manager is running
197   * @param coordinator procedure coordinator instance.  exposed for testing.
198   * @param pool HBase ExecutorServcie instance, exposed for testing.
199   */
200  @VisibleForTesting
201  SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator,
202      ExecutorService pool, int sentinelCleanInterval)
203      throws IOException, UnsupportedOperationException {
204    this.master = master;
205
206    this.rootDir = master.getMasterFileSystem().getRootDir();
207    Configuration conf = master.getConfiguration();
208    checkSnapshotSupport(conf, master.getMasterFileSystem());
209
210    this.coordinator = coordinator;
211    this.executorService = pool;
212    resetTempDir();
213    snapshotHandlerChoreCleanerTask = this.scheduleThreadPool.scheduleAtFixedRate(
214      this::cleanupSentinels, sentinelCleanInterval, sentinelCleanInterval, TimeUnit.SECONDS);
215  }
216
217  /**
218   * Gets the list of all completed snapshots.
219   * @return list of SnapshotDescriptions
220   * @throws IOException File system exception
221   */
222  public List<SnapshotDescription> getCompletedSnapshots() throws IOException {
223    return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir), true);
224  }
225
226  /**
227   * Gets the list of all completed snapshots.
228   * @param snapshotDir snapshot directory
229   * @param withCpCall Whether to call CP hooks
230   * @return list of SnapshotDescriptions
231   * @throws IOException File system exception
232   */
233  private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir, boolean withCpCall)
234      throws IOException {
235    List<SnapshotDescription> snapshotDescs = new ArrayList<>();
236    // first create the snapshot root path and check to see if it exists
237    FileSystem fs = master.getMasterFileSystem().getFileSystem();
238    if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
239
240    // if there are no snapshots, return an empty list
241    if (!fs.exists(snapshotDir)) {
242      return snapshotDescs;
243    }
244
245    // ignore all the snapshots in progress
246    FileStatus[] snapshots = fs.listStatus(snapshotDir,
247      new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
248    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
249    withCpCall = withCpCall && cpHost != null;
250    // loop through all the completed snapshots
251    for (FileStatus snapshot : snapshots) {
252      Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
253      // if the snapshot is bad
254      if (!fs.exists(info)) {
255        LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist");
256        continue;
257      }
258      FSDataInputStream in = null;
259      try {
260        in = fs.open(info);
261        SnapshotDescription desc = SnapshotDescription.parseFrom(in);
262        org.apache.hadoop.hbase.client.SnapshotDescription descPOJO = (withCpCall)
263            ? ProtobufUtil.createSnapshotDesc(desc) : null;
264        if (withCpCall) {
265          try {
266            cpHost.preListSnapshot(descPOJO);
267          } catch (AccessDeniedException e) {
268            LOG.warn("Current user does not have access to " + desc.getName() + " snapshot. "
269                + "Either you should be owner of this snapshot or admin user.");
270            // Skip this and try for next snapshot
271            continue;
272          }
273        }
274        snapshotDescs.add(desc);
275
276        // call coproc post hook
277        if (withCpCall) {
278          cpHost.postListSnapshot(descPOJO);
279        }
280      } catch (IOException e) {
281        LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e);
282      } finally {
283        if (in != null) {
284          in.close();
285        }
286      }
287    }
288    return snapshotDescs;
289  }
290
291  /**
292   * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed
293   * snapshot attempts.
294   *
295   * @throws IOException if we can't reach the filesystem
296   */
297  private void resetTempDir() throws IOException {
298    // cleanup any existing snapshots.
299    Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir,
300        master.getConfiguration());
301    FileSystem tmpFs = tmpdir.getFileSystem(master.getConfiguration());
302    if (!tmpFs.delete(tmpdir, true)) {
303      LOG.warn("Couldn't delete working snapshot directory: " + tmpdir);
304    }
305  }
306
307  /**
308   * Delete the specified snapshot
309   * @param snapshot
310   * @throws SnapshotDoesNotExistException If the specified snapshot does not exist.
311   * @throws IOException For filesystem IOExceptions
312   */
313  public void deleteSnapshot(SnapshotDescription snapshot) throws IOException {
314    // check to see if it is completed
315    if (!isSnapshotCompleted(snapshot)) {
316      throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(snapshot));
317    }
318
319    String snapshotName = snapshot.getName();
320    // first create the snapshot description and check to see if it exists
321    FileSystem fs = master.getMasterFileSystem().getFileSystem();
322    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
323    // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with
324    // just the "name" and it does not contains the "real" snapshot information
325    snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
326
327    // call coproc pre hook
328    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
329    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
330    if (cpHost != null) {
331      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
332      cpHost.preDeleteSnapshot(snapshotPOJO);
333    }
334
335    LOG.debug("Deleting snapshot: " + snapshotName);
336    // delete the existing snapshot
337    if (!fs.delete(snapshotDir, true)) {
338      throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
339    }
340
341    // call coproc post hook
342    if (cpHost != null) {
343      cpHost.postDeleteSnapshot(snapshotPOJO);
344    }
345
346  }
347
348  /**
349   * Check if the specified snapshot is done
350   *
351   * @param expected
352   * @return true if snapshot is ready to be restored, false if it is still being taken.
353   * @throws IOException IOException if error from HDFS or RPC
354   * @throws UnknownSnapshotException if snapshot is invalid or does not exist.
355   */
356  public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
357    // check the request to make sure it has a snapshot
358    if (expected == null) {
359      throw new UnknownSnapshotException(
360         "No snapshot name passed in request, can't figure out which snapshot you want to check.");
361    }
362
363    String ssString = ClientSnapshotDescriptionUtils.toString(expected);
364
365    // check to see if the sentinel exists,
366    // and if the task is complete removes it from the in-progress snapshots map.
367    SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected);
368
369    // stop tracking "abandoned" handlers
370    cleanupSentinels();
371
372    if (handler == null) {
373      // If there's no handler in the in-progress map, it means one of the following:
374      //   - someone has already requested the snapshot state
375      //   - the requested snapshot was completed long time ago (cleanupSentinels() timeout)
376      //   - the snapshot was never requested
377      // In those cases returns to the user the "done state" if the snapshots exists on disk,
378      // otherwise raise an exception saying that the snapshot is not running and doesn't exist.
379      if (!isSnapshotCompleted(expected)) {
380        throw new UnknownSnapshotException("Snapshot " + ssString
381            + " is not currently running or one of the known completed snapshots.");
382      }
383      // was done, return true;
384      return true;
385    }
386
387    // pass on any failure we find in the sentinel
388    try {
389      handler.rethrowExceptionIfFailed();
390    } catch (ForeignException e) {
391      // Give some procedure info on an exception.
392      String status;
393      Procedure p = coordinator.getProcedure(expected.getName());
394      if (p != null) {
395        status = p.getStatus();
396      } else {
397        status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
398      }
399      throw new HBaseSnapshotException("Snapshot " + ssString +  " had an error.  " + status, e,
400        ProtobufUtil.createSnapshotDesc(expected));
401    }
402
403    // check to see if we are done
404    if (handler.isFinished()) {
405      LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
406      return true;
407    } else if (LOG.isDebugEnabled()) {
408      LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
409    }
410    return false;
411  }
412
413  /**
414   * Check to see if there is a snapshot in progress with the same name or on the same table.
415   * Currently we have a limitation only allowing a single snapshot per table at a time. Also we
416   * don't allow snapshot with the same name.
417   * @param snapshot description of the snapshot being checked.
418   * @return <tt>true</tt> if there is a snapshot in progress with the same name or on the same
419   *         table.
420   */
421  synchronized boolean isTakingSnapshot(final SnapshotDescription snapshot) {
422    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
423    if (isTakingSnapshot(snapshotTable)) {
424      return true;
425    }
426    Iterator<Map.Entry<TableName, SnapshotSentinel>> it = this.snapshotHandlers.entrySet().iterator();
427    while (it.hasNext()) {
428      Map.Entry<TableName, SnapshotSentinel> entry = it.next();
429      SnapshotSentinel sentinel = entry.getValue();
430      if (snapshot.getName().equals(sentinel.getSnapshot().getName()) && !sentinel.isFinished()) {
431        return true;
432      }
433    }
434    return false;
435  }
436
437  /**
438   * Check to see if the specified table has a snapshot in progress.  Currently we have a
439   * limitation only allowing a single snapshot per table at a time.
440   * @param tableName name of the table being snapshotted.
441   * @return <tt>true</tt> if there is a snapshot in progress on the specified table.
442   */
443  public boolean isTakingSnapshot(final TableName tableName) {
444    SnapshotSentinel handler = this.snapshotHandlers.get(tableName);
445    return handler != null && !handler.isFinished();
446  }
447
448  /**
449   * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we
450   * aren't already running a snapshot or restore on the requested table.
451   * @param snapshot description of the snapshot we want to start
452   * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot
453   */
454  private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
455      throws HBaseSnapshotException {
456    Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
457        master.getConfiguration());
458    TableName snapshotTable =
459        TableName.valueOf(snapshot.getTable());
460
461    // make sure we aren't already running a snapshot
462    if (isTakingSnapshot(snapshot)) {
463      SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable);
464      throw new SnapshotCreationException("Rejected taking "
465          + ClientSnapshotDescriptionUtils.toString(snapshot)
466          + " because we are already running another snapshot "
467          + (handler != null ? ("on the same table " +
468              ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()))
469              : "with the same name"), ProtobufUtil.createSnapshotDesc(snapshot));
470    }
471
472    // make sure we aren't running a restore on the same table
473    if (isRestoringTable(snapshotTable)) {
474      throw new SnapshotCreationException("Rejected taking "
475          + ClientSnapshotDescriptionUtils.toString(snapshot)
476          + " because we are already have a restore in progress on the same snapshot.");
477    }
478
479    try {
480      FileSystem workingDirFS = workingDir.getFileSystem(master.getConfiguration());
481      // delete the working directory, since we aren't running the snapshot. Likely leftovers
482      // from a failed attempt.
483      workingDirFS.delete(workingDir, true);
484
485      // recreate the working directory for the snapshot
486      if (!workingDirFS.mkdirs(workingDir)) {
487        throw new SnapshotCreationException("Couldn't create working directory (" + workingDir
488            + ") for snapshot" , ProtobufUtil.createSnapshotDesc(snapshot));
489      }
490    } catch (HBaseSnapshotException e) {
491      throw e;
492    } catch (IOException e) {
493      throw new SnapshotCreationException(
494          "Exception while checking to see if snapshot could be started.", e,
495          ProtobufUtil.createSnapshotDesc(snapshot));
496    }
497  }
498
499  /**
500   * Take a snapshot of a disabled table.
501   * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}.
502   * @throws IOException if the snapshot could not be started or filesystem for snapshot
503   *         temporary directory could not be determined
504   */
505  private synchronized void snapshotDisabledTable(SnapshotDescription snapshot)
506      throws IOException {
507    // setup the snapshot
508    prepareToTakeSnapshot(snapshot);
509
510    // set the snapshot to be a disabled snapshot, since the client doesn't know about that
511    snapshot = snapshot.toBuilder().setType(Type.DISABLED).build();
512
513    // Take the snapshot of the disabled table
514    DisabledTableSnapshotHandler handler =
515        new DisabledTableSnapshotHandler(snapshot, master, this);
516    snapshotTable(snapshot, handler);
517  }
518
519  /**
520   * Take a snapshot of an enabled table.
521   * @param snapshot description of the snapshot to take.
522   * @throws IOException if the snapshot could not be started or filesystem for snapshot
523   *         temporary directory could not be determined
524   */
525  private synchronized void snapshotEnabledTable(SnapshotDescription snapshot)
526          throws IOException {
527    // setup the snapshot
528    prepareToTakeSnapshot(snapshot);
529
530    // Take the snapshot of the enabled table
531    EnabledTableSnapshotHandler handler =
532        new EnabledTableSnapshotHandler(snapshot, master, this);
533    snapshotTable(snapshot, handler);
534  }
535
536  /**
537   * Take a snapshot using the specified handler.
538   * On failure the snapshot temporary working directory is removed.
539   * NOTE: prepareToTakeSnapshot() called before this one takes care of the rejecting the
540   *       snapshot request if the table is busy with another snapshot/restore operation.
541   * @param snapshot the snapshot description
542   * @param handler the snapshot handler
543   */
544  private synchronized void snapshotTable(SnapshotDescription snapshot,
545      final TakeSnapshotHandler handler) throws IOException {
546    try {
547      handler.prepare();
548      this.executorService.submit(handler);
549      this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler);
550    } catch (Exception e) {
551      // cleanup the working directory by trying to delete it from the fs.
552      Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
553          master.getConfiguration());
554      FileSystem workingDirFs = workingDir.getFileSystem(master.getConfiguration());
555      try {
556        if (!workingDirFs.delete(workingDir, true)) {
557          LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
558              ClientSnapshotDescriptionUtils.toString(snapshot));
559        }
560      } catch (IOException e1) {
561        LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
562            ClientSnapshotDescriptionUtils.toString(snapshot));
563      }
564      // fail the snapshot
565      throw new SnapshotCreationException("Could not build snapshot handler", e,
566        ProtobufUtil.createSnapshotDesc(snapshot));
567    }
568  }
569
570  public ReadWriteLock getTakingSnapshotLock() {
571    return this.takingSnapshotLock;
572  }
573
574  /**
575   * The snapshot operation processing as following: <br>
576   * 1. Create a Snapshot Handler, and do some initialization; <br>
577   * 2. Put the handler into snapshotHandlers <br>
578   * So when we consider if any snapshot is taking, we should consider both the takingSnapshotLock
579   * and snapshotHandlers;
580   * @return true to indicate that there're some running snapshots.
581   */
582  public synchronized boolean isTakingAnySnapshot() {
583    return this.takingSnapshotLock.getReadHoldCount() > 0 || this.snapshotHandlers.size() > 0;
584  }
585
586  /**
587   * Take a snapshot based on the enabled/disabled state of the table.
588   * @param snapshot
589   * @throws HBaseSnapshotException when a snapshot specific exception occurs.
590   * @throws IOException when some sort of generic IO exception occurs.
591   */
592  public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
593    this.takingSnapshotLock.readLock().lock();
594    try {
595      takeSnapshotInternal(snapshot);
596    } finally {
597      this.takingSnapshotLock.readLock().unlock();
598    }
599  }
600
601  private void takeSnapshotInternal(SnapshotDescription snapshot) throws IOException {
602    // check to see if we already completed the snapshot
603    if (isSnapshotCompleted(snapshot)) {
604      throw new SnapshotExistsException(
605          "Snapshot '" + snapshot.getName() + "' already stored on the filesystem.",
606          ProtobufUtil.createSnapshotDesc(snapshot));
607    }
608
609    LOG.debug("No existing snapshot, attempting snapshot...");
610
611    // stop tracking "abandoned" handlers
612    cleanupSentinels();
613
614    // check to see if the table exists
615    TableDescriptor desc = null;
616    try {
617      desc = master.getTableDescriptors().get(
618          TableName.valueOf(snapshot.getTable()));
619    } catch (FileNotFoundException e) {
620      String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
621      LOG.error(msg);
622      throw new SnapshotCreationException(msg, e, ProtobufUtil.createSnapshotDesc(snapshot));
623    } catch (IOException e) {
624      throw new SnapshotCreationException(
625          "Error while geting table description for table " + snapshot.getTable(), e,
626          ProtobufUtil.createSnapshotDesc(snapshot));
627    }
628    if (desc == null) {
629      throw new SnapshotCreationException(
630          "Table '" + snapshot.getTable() + "' doesn't exist, can't take snapshot.",
631          ProtobufUtil.createSnapshotDesc(snapshot));
632    }
633    SnapshotDescription.Builder builder = snapshot.toBuilder();
634    // if not specified, set the snapshot format
635    if (!snapshot.hasVersion()) {
636      builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION);
637    }
638    RpcServer.getRequestUser().ifPresent(user -> {
639      if (AccessChecker.isAuthorizationSupported(master.getConfiguration())) {
640        builder.setOwner(user.getShortName());
641      }
642    });
643    snapshot = builder.build();
644
645    // call pre coproc hook
646    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
647    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
648    if (cpHost != null) {
649      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
650      cpHost.preSnapshot(snapshotPOJO, desc);
651    }
652
653    // if the table is enabled, then have the RS run actually the snapshot work
654    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
655    if (master.getTableStateManager().isTableState(snapshotTable,
656        TableState.State.ENABLED)) {
657      LOG.debug("Table enabled, starting distributed snapshot.");
658      snapshotEnabledTable(snapshot);
659      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
660    }
661    // For disabled table, snapshot is created by the master
662    else if (master.getTableStateManager().isTableState(snapshotTable,
663        TableState.State.DISABLED)) {
664      LOG.debug("Table is disabled, running snapshot entirely on master.");
665      snapshotDisabledTable(snapshot);
666      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
667    } else {
668      LOG.error("Can't snapshot table '" + snapshot.getTable()
669          + "', isn't open or closed, we don't know what to do!");
670      TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable()
671          + " isn't fully open.");
672      throw new SnapshotCreationException("Table is not entirely open or closed", tpoe,
673        ProtobufUtil.createSnapshotDesc(snapshot));
674    }
675
676    // call post coproc hook
677    if (cpHost != null) {
678      cpHost.postSnapshot(snapshotPOJO, desc);
679    }
680  }
681
682  /**
683   * Set the handler for the current snapshot
684   * <p>
685   * Exposed for TESTING
686   * @param tableName
687   * @param handler handler the master should use
688   *
689   * TODO get rid of this if possible, repackaging, modify tests.
690   */
691  public synchronized void setSnapshotHandlerForTesting(
692      final TableName tableName,
693      final SnapshotSentinel handler) {
694    if (handler != null) {
695      this.snapshotHandlers.put(tableName, handler);
696    } else {
697      this.snapshotHandlers.remove(tableName);
698    }
699  }
700
701  /**
702   * @return distributed commit coordinator for all running snapshots
703   */
704  ProcedureCoordinator getCoordinator() {
705    return coordinator;
706  }
707
708  /**
709   * Check to see if the snapshot is one of the currently completed snapshots
710   * Returns true if the snapshot exists in the "completed snapshots folder".
711   *
712   * @param snapshot expected snapshot to check
713   * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
714   *         not stored
715   * @throws IOException if the filesystem throws an unexpected exception,
716   * @throws IllegalArgumentException if snapshot name is invalid.
717   */
718  private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
719    try {
720      final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
721      FileSystem fs = master.getMasterFileSystem().getFileSystem();
722      // check to see if the snapshot already exists
723      return fs.exists(snapshotDir);
724    } catch (IllegalArgumentException iae) {
725      throw new UnknownSnapshotException("Unexpected exception thrown", iae);
726    }
727  }
728
729  /**
730   * Clone the specified snapshot.
731   * The clone will fail if the destination table has a snapshot or restore in progress.
732   *
733   * @param reqSnapshot Snapshot Descriptor from request
734   * @param tableName table to clone
735   * @param snapshot Snapshot Descriptor
736   * @param snapshotTableDesc Table Descriptor
737   * @param nonceKey unique identifier to prevent duplicated RPC
738   * @return procId the ID of the clone snapshot procedure
739   * @throws IOException
740   */
741  private long cloneSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName,
742      final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc,
743      final NonceKey nonceKey, final boolean restoreAcl) throws IOException {
744    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
745    TableDescriptor htd = TableDescriptorBuilder.copy(tableName, snapshotTableDesc);
746    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
747    if (cpHost != null) {
748      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
749      cpHost.preCloneSnapshot(snapshotPOJO, htd);
750    }
751    long procId;
752    try {
753      procId = cloneSnapshot(snapshot, htd, nonceKey, restoreAcl);
754    } catch (IOException e) {
755      LOG.error("Exception occurred while cloning the snapshot " + snapshot.getName()
756        + " as table " + tableName.getNameAsString(), e);
757      throw e;
758    }
759    LOG.info("Clone snapshot=" + snapshot.getName() + " as table=" + tableName);
760
761    if (cpHost != null) {
762      cpHost.postCloneSnapshot(snapshotPOJO, htd);
763    }
764    return procId;
765  }
766
767  /**
768   * Clone the specified snapshot into a new table.
769   * The operation will fail if the destination table has a snapshot or restore in progress.
770   *
771   * @param snapshot Snapshot Descriptor
772   * @param tableDescriptor Table Descriptor of the table to create
773   * @param nonceKey unique identifier to prevent duplicated RPC
774   * @return procId the ID of the clone snapshot procedure
775   */
776  synchronized long cloneSnapshot(final SnapshotDescription snapshot,
777      final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl)
778      throws HBaseSnapshotException {
779    TableName tableName = tableDescriptor.getTableName();
780
781    // make sure we aren't running a snapshot on the same table
782    if (isTakingSnapshot(tableName)) {
783      throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
784    }
785
786    // make sure we aren't running a restore on the same table
787    if (isRestoringTable(tableName)) {
788      throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
789    }
790
791    try {
792      long procId = master.getMasterProcedureExecutor().submitProcedure(
793        new CloneSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(),
794                tableDescriptor, snapshot, restoreAcl),
795        nonceKey);
796      this.restoreTableToProcIdMap.put(tableName, procId);
797      return procId;
798    } catch (Exception e) {
799      String msg = "Couldn't clone the snapshot="
800        + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + tableName;
801      LOG.error(msg, e);
802      throw new RestoreSnapshotException(msg, e);
803    }
804  }
805
806  /**
807   * Restore or Clone the specified snapshot
808   * @param reqSnapshot
809   * @param nonceKey unique identifier to prevent duplicated RPC
810   * @throws IOException
811   */
812  public long restoreOrCloneSnapshot(final SnapshotDescription reqSnapshot, final NonceKey nonceKey,
813      final boolean restoreAcl) throws IOException {
814    FileSystem fs = master.getMasterFileSystem().getFileSystem();
815    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir);
816
817    // check if the snapshot exists
818    if (!fs.exists(snapshotDir)) {
819      LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist.");
820      throw new SnapshotDoesNotExistException(
821        ProtobufUtil.createSnapshotDesc(reqSnapshot));
822    }
823
824    // Get snapshot info from file system. The reqSnapshot is a "fake" snapshotInfo with
825    // just the snapshot "name" and table name to restore. It does not contains the "real" snapshot
826    // information.
827    SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
828    SnapshotManifest manifest = SnapshotManifest.open(master.getConfiguration(), fs,
829        snapshotDir, snapshot);
830    TableDescriptor snapshotTableDesc = manifest.getTableDescriptor();
831    TableName tableName = TableName.valueOf(reqSnapshot.getTable());
832
833    // sanity check the new table descriptor
834    TableDescriptorChecker.sanityCheck(master.getConfiguration(), snapshotTableDesc);
835
836    // stop tracking "abandoned" handlers
837    cleanupSentinels();
838
839    // Verify snapshot validity
840    SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest);
841
842    // Execute the restore/clone operation
843    long procId;
844    if (MetaTableAccessor.tableExists(master.getConnection(), tableName)) {
845      procId = restoreSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey,
846        restoreAcl);
847    } else {
848      procId =
849          cloneSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey, restoreAcl);
850    }
851    return procId;
852  }
853
854  /**
855   * Restore the specified snapshot. The restore will fail if the destination table has a snapshot
856   * or restore in progress.
857   * @param reqSnapshot Snapshot Descriptor from request
858   * @param tableName table to restore
859   * @param snapshot Snapshot Descriptor
860   * @param snapshotTableDesc Table Descriptor
861   * @param nonceKey unique identifier to prevent duplicated RPC
862   * @param restoreAcl true to restore acl of snapshot
863   * @return procId the ID of the restore snapshot procedure
864   * @throws IOException
865   */
866  private long restoreSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName,
867      final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc,
868      final NonceKey nonceKey, final boolean restoreAcl) throws IOException {
869    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
870
871    if (master.getTableStateManager().isTableState(
872      TableName.valueOf(snapshot.getTable()), TableState.State.ENABLED)) {
873      throw new UnsupportedOperationException("Table '" +
874        TableName.valueOf(snapshot.getTable()) + "' must be disabled in order to " +
875        "perform a restore operation.");
876    }
877
878    // call Coprocessor pre hook
879    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
880    if (cpHost != null) {
881      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
882      cpHost.preRestoreSnapshot(snapshotPOJO, snapshotTableDesc);
883    }
884
885    long procId;
886    try {
887      procId = restoreSnapshot(snapshot, snapshotTableDesc, nonceKey, restoreAcl);
888    } catch (IOException e) {
889      LOG.error("Exception occurred while restoring the snapshot " + snapshot.getName()
890        + " as table " + tableName.getNameAsString(), e);
891      throw e;
892    }
893    LOG.info("Restore snapshot=" + snapshot.getName() + " as table=" + tableName);
894
895    if (cpHost != null) {
896      cpHost.postRestoreSnapshot(snapshotPOJO, snapshotTableDesc);
897    }
898
899    return procId;
900  }
901
902  /**
903   * Restore the specified snapshot. The restore will fail if the destination table has a snapshot
904   * or restore in progress.
905   * @param snapshot Snapshot Descriptor
906   * @param tableDescriptor Table Descriptor
907   * @param nonceKey unique identifier to prevent duplicated RPC
908   * @param restoreAcl true to restore acl of snapshot
909   * @return procId the ID of the restore snapshot procedure
910   */
911  private synchronized long restoreSnapshot(final SnapshotDescription snapshot,
912      final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl)
913      throws HBaseSnapshotException {
914    final TableName tableName = tableDescriptor.getTableName();
915
916    // make sure we aren't running a snapshot on the same table
917    if (isTakingSnapshot(tableName)) {
918      throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
919    }
920
921    // make sure we aren't running a restore on the same table
922    if (isRestoringTable(tableName)) {
923      throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
924    }
925
926    try {
927      long procId = master.getMasterProcedureExecutor().submitProcedure(
928        new RestoreSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(),
929                tableDescriptor, snapshot, restoreAcl),
930        nonceKey);
931      this.restoreTableToProcIdMap.put(tableName, procId);
932      return procId;
933    } catch (Exception e) {
934      String msg = "Couldn't restore the snapshot=" + ClientSnapshotDescriptionUtils.toString(
935          snapshot)  +
936          " on table=" + tableName;
937      LOG.error(msg, e);
938      throw new RestoreSnapshotException(msg, e);
939    }
940  }
941
942  /**
943   * Verify if the restore of the specified table is in progress.
944   *
945   * @param tableName table under restore
946   * @return <tt>true</tt> if there is a restore in progress of the specified table.
947   */
948  private synchronized boolean isRestoringTable(final TableName tableName) {
949    Long procId = this.restoreTableToProcIdMap.get(tableName);
950    if (procId == null) {
951      return false;
952    }
953    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
954    if (procExec.isRunning() && !procExec.isFinished(procId)) {
955      return true;
956    } else {
957      this.restoreTableToProcIdMap.remove(tableName);
958      return false;
959    }
960  }
961
962  /**
963   * Return the handler if it is currently live and has the same snapshot target name.
964   * The handler is removed from the sentinels map if completed.
965   * @param sentinels live handlers
966   * @param snapshot snapshot description
967   * @return null if doesn't match, else a live handler.
968   */
969  private synchronized SnapshotSentinel removeSentinelIfFinished(
970      final Map<TableName, SnapshotSentinel> sentinels,
971      final SnapshotDescription snapshot) {
972    if (!snapshot.hasTable()) {
973      return null;
974    }
975
976    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
977    SnapshotSentinel h = sentinels.get(snapshotTable);
978    if (h == null) {
979      return null;
980    }
981
982    if (!h.getSnapshot().getName().equals(snapshot.getName())) {
983      // specified snapshot is to the one currently running
984      return null;
985    }
986
987    // Remove from the "in-progress" list once completed
988    if (h.isFinished()) {
989      sentinels.remove(snapshotTable);
990    }
991
992    return h;
993  }
994
995  /**
996   * Removes "abandoned" snapshot/restore requests.
997   * As part of the HBaseAdmin snapshot/restore API the operation status is checked until completed,
998   * and the in-progress maps are cleaned up when the status of a completed task is requested.
999   * To avoid having sentinels staying around for long time if something client side is failed,
1000   * each operation tries to clean up the in-progress maps sentinels finished from a long time.
1001   */
1002  private void cleanupSentinels() {
1003    cleanupSentinels(this.snapshotHandlers);
1004    cleanupCompletedRestoreInMap();
1005  }
1006
1007  /**
1008   * Remove the sentinels that are marked as finished and the completion time
1009   * has exceeded the removal timeout.
1010   * @param sentinels map of sentinels to clean
1011   */
1012  private synchronized void cleanupSentinels(final Map<TableName, SnapshotSentinel> sentinels) {
1013    long currentTime = EnvironmentEdgeManager.currentTime();
1014    long sentinelsCleanupTimeoutMillis =
1015        master.getConfiguration().getLong(HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS,
1016          SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT);
1017    Iterator<Map.Entry<TableName, SnapshotSentinel>> it = sentinels.entrySet().iterator();
1018    while (it.hasNext()) {
1019      Map.Entry<TableName, SnapshotSentinel> entry = it.next();
1020      SnapshotSentinel sentinel = entry.getValue();
1021      if (sentinel.isFinished()
1022          && (currentTime - sentinel.getCompletionTimestamp()) > sentinelsCleanupTimeoutMillis) {
1023        it.remove();
1024      }
1025    }
1026  }
1027
1028  /**
1029   * Remove the procedures that are marked as finished
1030   */
1031  private synchronized void cleanupCompletedRestoreInMap() {
1032    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
1033    Iterator<Map.Entry<TableName, Long>> it = restoreTableToProcIdMap.entrySet().iterator();
1034    while (it.hasNext()) {
1035      Map.Entry<TableName, Long> entry = it.next();
1036      Long procId = entry.getValue();
1037      if (procExec.isRunning() && procExec.isFinished(procId)) {
1038        it.remove();
1039      }
1040    }
1041  }
1042
1043  //
1044  // Implementing Stoppable interface
1045  //
1046
1047  @Override
1048  public void stop(String why) {
1049    // short circuit
1050    if (this.stopped) return;
1051    // make sure we get stop
1052    this.stopped = true;
1053    // pass the stop onto take snapshot handlers
1054    for (SnapshotSentinel snapshotHandler: this.snapshotHandlers.values()) {
1055      snapshotHandler.cancel(why);
1056    }
1057    if (snapshotHandlerChoreCleanerTask != null) {
1058      snapshotHandlerChoreCleanerTask.cancel(true);
1059    }
1060    try {
1061      if (coordinator != null) {
1062        coordinator.close();
1063      }
1064    } catch (IOException e) {
1065      LOG.error("stop ProcedureCoordinator error", e);
1066    }
1067  }
1068
1069  @Override
1070  public boolean isStopped() {
1071    return this.stopped;
1072  }
1073
1074  /**
1075   * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported.
1076   * Called at the beginning of snapshot() and restoreSnapshot() methods.
1077   * @throws UnsupportedOperationException if snapshot are not supported
1078   */
1079  public void checkSnapshotSupport() throws UnsupportedOperationException {
1080    if (!this.isSnapshotSupported) {
1081      throw new UnsupportedOperationException(
1082        "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" +
1083          HBASE_SNAPSHOT_ENABLED + "' property with value 'true'.");
1084    }
1085  }
1086
1087  /**
1088   * Called at startup, to verify if snapshot operation is supported, and to avoid
1089   * starting the master if there're snapshots present but the cleaners needed are missing.
1090   * Otherwise we can end up with snapshot data loss.
1091   * @param conf The {@link Configuration} object to use
1092   * @param mfs The MasterFileSystem to use
1093   * @throws IOException in case of file-system operation failure
1094   * @throws UnsupportedOperationException in case cleaners are missing and
1095   *         there're snapshot in the system
1096   */
1097  private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs)
1098      throws IOException, UnsupportedOperationException {
1099    // Verify if snapshot is disabled by the user
1100    String enabled = conf.get(HBASE_SNAPSHOT_ENABLED);
1101    boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false);
1102    boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled);
1103
1104    // Extract cleaners from conf
1105    Set<String> hfileCleaners = new HashSet<>();
1106    String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
1107    if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
1108
1109    Set<String> logCleaners = new HashSet<>();
1110    cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
1111    if (cleaners != null) Collections.addAll(logCleaners, cleaners);
1112
1113    // check if an older version of snapshot directory was present
1114    Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME);
1115    FileSystem fs = mfs.getFileSystem();
1116    List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir), false);
1117    if (ss != null && !ss.isEmpty()) {
1118      LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir);
1119      LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME);
1120    }
1121
1122    // If the user has enabled the snapshot, we force the cleaners to be present
1123    // otherwise we still need to check if cleaners are enabled or not and verify
1124    // that there're no snapshot in the .snapshot folder.
1125    if (snapshotEnabled) {
1126      // Inject snapshot cleaners, if snapshot.enable is true
1127      hfileCleaners.add(SnapshotHFileCleaner.class.getName());
1128      hfileCleaners.add(HFileLinkCleaner.class.getName());
1129      // If sync acl to HDFS feature is enabled, then inject the cleaner
1130      if (SnapshotScannerHDFSAclHelper.isAclSyncToHdfsEnabled(conf)) {
1131        hfileCleaners.add(SnapshotScannerHDFSAclCleaner.class.getName());
1132      }
1133
1134      // Set cleaners conf
1135      conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
1136        hfileCleaners.toArray(new String[hfileCleaners.size()]));
1137      conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS,
1138        logCleaners.toArray(new String[logCleaners.size()]));
1139    } else {
1140      // Verify if cleaners are present
1141      snapshotEnabled =
1142        hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
1143        hfileCleaners.contains(HFileLinkCleaner.class.getName());
1144
1145      // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set.
1146      if (snapshotEnabled) {
1147        LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " +
1148          "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " +
1149          (userDisabled ? "is set to 'false'." : "is not set."));
1150      }
1151    }
1152
1153    // Mark snapshot feature as enabled if cleaners are present and user has not disabled it.
1154    this.isSnapshotSupported = snapshotEnabled && !userDisabled;
1155
1156    // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder
1157    // otherwise we end up with snapshot data loss.
1158    if (!snapshotEnabled) {
1159      LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners.");
1160      Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir());
1161      if (fs.exists(snapshotDir)) {
1162        FileStatus[] snapshots = CommonFSUtils.listStatus(fs, snapshotDir,
1163          new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
1164        if (snapshots != null) {
1165          LOG.error("Snapshots are present, but cleaners are not enabled.");
1166          checkSnapshotSupport();
1167        }
1168      }
1169    }
1170  }
1171
1172  @Override
1173  public void initialize(MasterServices master, MetricsMaster metricsMaster) throws KeeperException,
1174      IOException, UnsupportedOperationException {
1175    this.master = master;
1176
1177    this.rootDir = master.getMasterFileSystem().getRootDir();
1178    checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
1179
1180    // get the configuration for the coordinator
1181    Configuration conf = master.getConfiguration();
1182    long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
1183    long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY,
1184                    SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT),
1185            conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS,
1186                    SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME));
1187    int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
1188
1189    // setup the default procedure coordinator
1190    String name = master.getServerName().toString();
1191    ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, opThreads);
1192    ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinator(
1193        master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name);
1194
1195    this.coordinator = new ProcedureCoordinator(comms, tpool, timeoutMillis, wakeFrequency);
1196    this.executorService = master.getExecutorService();
1197    resetTempDir();
1198    snapshotHandlerChoreCleanerTask =
1199        scheduleThreadPool.scheduleAtFixedRate(this::cleanupSentinels, 10, 10, TimeUnit.SECONDS);
1200  }
1201
1202  @Override
1203  public String getProcedureSignature() {
1204    return ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION;
1205  }
1206
1207  @Override
1208  public void execProcedure(ProcedureDescription desc) throws IOException {
1209    takeSnapshot(toSnapshotDescription(desc));
1210  }
1211
1212  @Override
1213  public void checkPermissions(ProcedureDescription desc, AccessChecker accessChecker, User user)
1214      throws IOException {
1215    // Done by AccessController as part of preSnapshot coprocessor hook (legacy code path).
1216    // In future, when we AC is removed for good, that check should be moved here.
1217  }
1218
1219  @Override
1220  public boolean isProcedureDone(ProcedureDescription desc) throws IOException {
1221    return isSnapshotDone(toSnapshotDescription(desc));
1222  }
1223
1224  private SnapshotDescription toSnapshotDescription(ProcedureDescription desc)
1225      throws IOException {
1226    SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
1227    if (!desc.hasInstance()) {
1228      throw new IOException("Snapshot name is not defined: " + desc.toString());
1229    }
1230    String snapshotName = desc.getInstance();
1231    List<NameStringPair> props = desc.getConfigurationList();
1232    String table = null;
1233    for (NameStringPair prop : props) {
1234      if ("table".equalsIgnoreCase(prop.getName())) {
1235        table = prop.getValue();
1236      }
1237    }
1238    if (table == null) {
1239      throw new IOException("Snapshot table is not defined: " + desc.toString());
1240    }
1241    TableName tableName = TableName.valueOf(table);
1242    builder.setTable(tableName.getNameAsString());
1243    builder.setName(snapshotName);
1244    builder.setType(SnapshotDescription.Type.FLUSH);
1245    return builder.build();
1246  }
1247}