001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.snapshot;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Collections;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030import java.util.concurrent.ConcurrentHashMap;
031import java.util.concurrent.Executors;
032import java.util.concurrent.ScheduledExecutorService;
033import java.util.concurrent.ScheduledFuture;
034import java.util.concurrent.ThreadPoolExecutor;
035import java.util.concurrent.TimeUnit;
036import java.util.concurrent.locks.ReadWriteLock;
037import java.util.concurrent.locks.ReentrantReadWriteLock;
038import org.apache.hadoop.conf.Configuration;
039import org.apache.hadoop.fs.FSDataInputStream;
040import org.apache.hadoop.fs.FileStatus;
041import org.apache.hadoop.fs.FileSystem;
042import org.apache.hadoop.fs.Path;
043import org.apache.hadoop.hbase.HBaseInterfaceAudience;
044import org.apache.hadoop.hbase.HConstants;
045import org.apache.hadoop.hbase.Stoppable;
046import org.apache.hadoop.hbase.TableName;
047import org.apache.hadoop.hbase.client.TableDescriptor;
048import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
049import org.apache.hadoop.hbase.client.TableState;
050import org.apache.hadoop.hbase.errorhandling.ForeignException;
051import org.apache.hadoop.hbase.executor.ExecutorService;
052import org.apache.hadoop.hbase.ipc.RpcServer;
053import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
054import org.apache.hadoop.hbase.master.MasterFileSystem;
055import org.apache.hadoop.hbase.master.MasterServices;
056import org.apache.hadoop.hbase.master.MetricsMaster;
057import org.apache.hadoop.hbase.master.SnapshotSentinel;
058import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
059import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner;
060import org.apache.hadoop.hbase.master.procedure.CloneSnapshotProcedure;
061import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
062import org.apache.hadoop.hbase.master.procedure.RestoreSnapshotProcedure;
063import org.apache.hadoop.hbase.procedure.MasterProcedureManager;
064import org.apache.hadoop.hbase.procedure.Procedure;
065import org.apache.hadoop.hbase.procedure.ProcedureCoordinator;
066import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs;
067import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinator;
068import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerValidationUtils;
070import org.apache.hadoop.hbase.security.AccessDeniedException;
071import org.apache.hadoop.hbase.security.User;
072import org.apache.hadoop.hbase.security.access.AccessChecker;
073import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclCleaner;
074import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper;
075import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
076import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
077import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
078import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
079import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
080import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
081import org.apache.hadoop.hbase.snapshot.SnapshotExistsException;
082import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
083import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
084import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException;
085import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
086import org.apache.hadoop.hbase.util.CommonFSUtils;
087import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
088import org.apache.hadoop.hbase.util.NonceKey;
089import org.apache.hadoop.hbase.util.TableDescriptorChecker;
090import org.apache.yetus.audience.InterfaceAudience;
091import org.apache.yetus.audience.InterfaceStability;
092import org.apache.zookeeper.KeeperException;
093import org.slf4j.Logger;
094import org.slf4j.LoggerFactory;
095
096import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
097
098import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
099import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameStringPair;
100import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ProcedureDescription;
101import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
102import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription.Type;
103
104/**
105 * This class manages the procedure of taking and restoring snapshots. There is only one
106 * SnapshotManager for the master.
107 * <p>
108 * The class provides methods for monitoring in-progress snapshot actions.
109 * <p>
110 * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a
111 * simplification in the current implementation.
112 */
113@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
114@InterfaceStability.Unstable
115public class SnapshotManager extends MasterProcedureManager implements Stoppable {
116  private static final Logger LOG = LoggerFactory.getLogger(SnapshotManager.class);
117
118  /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */
119  private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500;
120
121  /**
122   * Wait time before removing a finished sentinel from the in-progress map NOTE: This is used as a
123   * safety auto cleanup. The snapshot and restore handlers map entries are removed when a user asks
124   * if a snapshot or restore is completed. This operation is part of the HBaseAdmin
125   * snapshot/restore API flow. In case something fails on the client side and the snapshot/restore
126   * state is not reclaimed after a default timeout, the entry is removed from the in-progress map.
127   * At this point, if the user asks for the snapshot/restore status, the result will be snapshot
128   * done if exists or failed if it doesn't exists.
129   */
130  public static final String HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS =
131    "hbase.snapshot.sentinels.cleanup.timeoutMillis";
132  public static final long SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT = 60 * 1000L;
133
134  /** Enable or disable snapshot support */
135  public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled";
136
137  /**
138   * Conf key for # of ms elapsed between checks for snapshot errors while waiting for completion.
139   */
140  private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
141
142  /** Name of the operation to use in the controller */
143  public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
144
145  /** Conf key for # of threads used by the SnapshotManager thread pool */
146  public static final String SNAPSHOT_POOL_THREADS_KEY = "hbase.snapshot.master.threads";
147
148  /** number of current operations running on the master */
149  public static final int SNAPSHOT_POOL_THREADS_DEFAULT = 1;
150
151  /** Conf key for preserving original max file size configs */
152  public static final String SNAPSHOT_MAX_FILE_SIZE_PRESERVE =
153    "hbase.snapshot.max.filesize.preserve";
154
155  private boolean stopped;
156  private MasterServices master; // Needed by TableEventHandlers
157  private ProcedureCoordinator coordinator;
158
159  // Is snapshot feature enabled?
160  private boolean isSnapshotSupported = false;
161
162  // Snapshot handlers map, with table name as key.
163  // The map is always accessed and modified under the object lock using synchronized.
164  // snapshotTable() will insert an Handler in the table.
165  // isSnapshotDone() will remove the handler requested if the operation is finished.
166  private final Map<TableName, SnapshotSentinel> snapshotHandlers = new ConcurrentHashMap<>();
167  private final ScheduledExecutorService scheduleThreadPool =
168    Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder()
169      .setNameFormat("SnapshotHandlerChoreCleaner").setDaemon(true).build());
170  private ScheduledFuture<?> snapshotHandlerChoreCleanerTask;
171
172  // Restore map, with table name as key, procedure ID as value.
173  // The map is always accessed and modified under the object lock using synchronized.
174  // restoreSnapshot()/cloneSnapshot() will insert a procedure ID in the map.
175  //
176  // TODO: just as the Apache HBase 1.x implementation, this map would not survive master
177  // restart/failover. This is just a stopgap implementation until implementation of taking
178  // snapshot using Procedure-V2.
179  private Map<TableName, Long> restoreTableToProcIdMap = new HashMap<>();
180
181  private Path rootDir;
182  private ExecutorService executorService;
183
184  /**
185   * Read write lock between taking snapshot and snapshot HFile cleaner. The cleaner should skip to
186   * check the HFiles if any snapshot is in progress, otherwise it may clean a HFile which would
187   * belongs to the newly creating snapshot. So we should grab the write lock first when cleaner
188   * start to work. (See HBASE-21387)
189   */
190  private ReentrantReadWriteLock takingSnapshotLock = new ReentrantReadWriteLock(true);
191
192  public SnapshotManager() {
193  }
194
195  /**
196   * Fully specify all necessary components of a snapshot manager. Exposed for testing.
197   * @param master      services for the master where the manager is running
198   * @param coordinator procedure coordinator instance. exposed for testing.
199   * @param pool        HBase ExecutorServcie instance, exposed for testing.
200   */
201  @InterfaceAudience.Private
202  SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator,
203    ExecutorService pool, int sentinelCleanInterval)
204    throws IOException, UnsupportedOperationException {
205    this.master = master;
206
207    this.rootDir = master.getMasterFileSystem().getRootDir();
208    Configuration conf = master.getConfiguration();
209    checkSnapshotSupport(conf, master.getMasterFileSystem());
210
211    this.coordinator = coordinator;
212    this.executorService = pool;
213    resetTempDir();
214    snapshotHandlerChoreCleanerTask = this.scheduleThreadPool.scheduleAtFixedRate(
215      this::cleanupSentinels, sentinelCleanInterval, sentinelCleanInterval, TimeUnit.SECONDS);
216  }
217
218  /**
219   * Gets the list of all completed snapshots.
220   * @return list of SnapshotDescriptions
221   * @throws IOException File system exception
222   */
223  public List<SnapshotDescription> getCompletedSnapshots() throws IOException {
224    return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir), true);
225  }
226
227  /**
228   * Gets the list of all completed snapshots.
229   * @param snapshotDir snapshot directory
230   * @param withCpCall  Whether to call CP hooks
231   * @return list of SnapshotDescriptions
232   * @throws IOException File system exception
233   */
234  private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir, boolean withCpCall)
235    throws IOException {
236    List<SnapshotDescription> snapshotDescs = new ArrayList<>();
237    // first create the snapshot root path and check to see if it exists
238    FileSystem fs = master.getMasterFileSystem().getFileSystem();
239    if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
240
241    // if there are no snapshots, return an empty list
242    if (!fs.exists(snapshotDir)) {
243      return snapshotDescs;
244    }
245
246    // ignore all the snapshots in progress
247    FileStatus[] snapshots = fs.listStatus(snapshotDir,
248      new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
249    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
250    withCpCall = withCpCall && cpHost != null;
251    // loop through all the completed snapshots
252    for (FileStatus snapshot : snapshots) {
253      Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
254      // if the snapshot is bad
255      if (!fs.exists(info)) {
256        LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist");
257        continue;
258      }
259      FSDataInputStream in = null;
260      try {
261        in = fs.open(info);
262        SnapshotDescription desc = SnapshotDescription.parseFrom(in);
263        org.apache.hadoop.hbase.client.SnapshotDescription descPOJO =
264          (withCpCall) ? ProtobufUtil.createSnapshotDesc(desc) : null;
265        if (withCpCall) {
266          try {
267            cpHost.preListSnapshot(descPOJO);
268          } catch (AccessDeniedException e) {
269            LOG.warn("Current user does not have access to " + desc.getName() + " snapshot. "
270              + "Either you should be owner of this snapshot or admin user.");
271            // Skip this and try for next snapshot
272            continue;
273          }
274        }
275        snapshotDescs.add(desc);
276
277        // call coproc post hook
278        if (withCpCall) {
279          cpHost.postListSnapshot(descPOJO);
280        }
281      } catch (IOException e) {
282        LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e);
283      } finally {
284        if (in != null) {
285          in.close();
286        }
287      }
288    }
289    return snapshotDescs;
290  }
291
292  /**
293   * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed snapshot
294   * attempts.
295   * @throws IOException if we can't reach the filesystem
296   */
297  private void resetTempDir() throws IOException {
298    // cleanup any existing snapshots.
299    Path tmpdir =
300      SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir, master.getConfiguration());
301    FileSystem tmpFs = tmpdir.getFileSystem(master.getConfiguration());
302    if (!tmpFs.delete(tmpdir, true)) {
303      LOG.warn("Couldn't delete working snapshot directory: " + tmpdir);
304    }
305  }
306
307  /**
308   * Delete the specified snapshot n * @throws SnapshotDoesNotExistException If the specified
309   * snapshot does not exist.
310   * @throws IOException For filesystem IOExceptions
311   */
312  public void deleteSnapshot(SnapshotDescription snapshot) throws IOException {
313    // check to see if it is completed
314    if (!isSnapshotCompleted(snapshot)) {
315      throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(snapshot));
316    }
317
318    String snapshotName = snapshot.getName();
319    // first create the snapshot description and check to see if it exists
320    FileSystem fs = master.getMasterFileSystem().getFileSystem();
321    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
322    // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with
323    // just the "name" and it does not contains the "real" snapshot information
324    snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
325
326    // call coproc pre hook
327    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
328    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
329    if (cpHost != null) {
330      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
331      cpHost.preDeleteSnapshot(snapshotPOJO);
332    }
333
334    LOG.debug("Deleting snapshot: " + snapshotName);
335    // delete the existing snapshot
336    if (!fs.delete(snapshotDir, true)) {
337      throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
338    }
339
340    // call coproc post hook
341    if (cpHost != null) {
342      cpHost.postDeleteSnapshot(snapshotPOJO);
343    }
344
345  }
346
347  /**
348   * Check if the specified snapshot is done n * @return true if snapshot is ready to be restored,
349   * false if it is still being taken.
350   * @throws IOException              IOException if error from HDFS or RPC
351   * @throws UnknownSnapshotException if snapshot is invalid or does not exist.
352   */
353  public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
354    // check the request to make sure it has a snapshot
355    if (expected == null) {
356      throw new UnknownSnapshotException(
357        "No snapshot name passed in request, can't figure out which snapshot you want to check.");
358    }
359
360    String ssString = ClientSnapshotDescriptionUtils.toString(expected);
361
362    // check to see if the sentinel exists,
363    // and if the task is complete removes it from the in-progress snapshots map.
364    SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected);
365
366    // stop tracking "abandoned" handlers
367    cleanupSentinels();
368
369    if (handler == null) {
370      // If there's no handler in the in-progress map, it means one of the following:
371      // - someone has already requested the snapshot state
372      // - the requested snapshot was completed long time ago (cleanupSentinels() timeout)
373      // - the snapshot was never requested
374      // In those cases returns to the user the "done state" if the snapshots exists on disk,
375      // otherwise raise an exception saying that the snapshot is not running and doesn't exist.
376      if (!isSnapshotCompleted(expected)) {
377        throw new UnknownSnapshotException("Snapshot " + ssString
378          + " is not currently running or one of the known completed snapshots.");
379      }
380      // was done, return true;
381      return true;
382    }
383
384    // pass on any failure we find in the sentinel
385    try {
386      handler.rethrowExceptionIfFailed();
387    } catch (ForeignException e) {
388      // Give some procedure info on an exception.
389      String status;
390      Procedure p = coordinator.getProcedure(expected.getName());
391      if (p != null) {
392        status = p.getStatus();
393      } else {
394        status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
395      }
396      throw new HBaseSnapshotException("Snapshot " + ssString + " had an error.  " + status, e,
397        ProtobufUtil.createSnapshotDesc(expected));
398    }
399
400    // check to see if we are done
401    if (handler.isFinished()) {
402      LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
403      return true;
404    } else if (LOG.isDebugEnabled()) {
405      LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
406    }
407    return false;
408  }
409
410  /**
411   * Check to see if there is a snapshot in progress with the same name or on the same table.
412   * Currently we have a limitation only allowing a single snapshot per table at a time. Also we
413   * don't allow snapshot with the same name.
414   * @param snapshot description of the snapshot being checked.
415   * @return <tt>true</tt> if there is a snapshot in progress with the same name or on the same
416   *         table.
417   */
418  synchronized boolean isTakingSnapshot(final SnapshotDescription snapshot) {
419    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
420    if (isTakingSnapshot(snapshotTable)) {
421      return true;
422    }
423    Iterator<Map.Entry<TableName, SnapshotSentinel>> it =
424      this.snapshotHandlers.entrySet().iterator();
425    while (it.hasNext()) {
426      Map.Entry<TableName, SnapshotSentinel> entry = it.next();
427      SnapshotSentinel sentinel = entry.getValue();
428      if (snapshot.getName().equals(sentinel.getSnapshot().getName()) && !sentinel.isFinished()) {
429        return true;
430      }
431    }
432    return false;
433  }
434
435  /**
436   * Check to see if the specified table has a snapshot in progress. Currently we have a limitation
437   * only allowing a single snapshot per table at a time.
438   * @param tableName name of the table being snapshotted.
439   * @return <tt>true</tt> if there is a snapshot in progress on the specified table.
440   */
441  public boolean isTakingSnapshot(final TableName tableName) {
442    SnapshotSentinel handler = this.snapshotHandlers.get(tableName);
443    return handler != null && !handler.isFinished();
444  }
445
446  /**
447   * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we
448   * aren't already running a snapshot or restore on the requested table.
449   * @param snapshot description of the snapshot we want to start
450   * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot
451   */
452  private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
453    throws HBaseSnapshotException {
454    Path workingDir =
455      SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, master.getConfiguration());
456    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
457
458    // make sure we aren't already running a snapshot
459    if (isTakingSnapshot(snapshot)) {
460      SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable);
461      throw new SnapshotCreationException("Rejected taking "
462        + ClientSnapshotDescriptionUtils.toString(snapshot)
463        + " because we are already running another snapshot "
464        + (handler != null
465          ? ("on the same table " + ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()))
466          : "with the same name"),
467        ProtobufUtil.createSnapshotDesc(snapshot));
468    }
469
470    // make sure we aren't running a restore on the same table
471    if (isRestoringTable(snapshotTable)) {
472      throw new SnapshotCreationException(
473        "Rejected taking " + ClientSnapshotDescriptionUtils.toString(snapshot)
474          + " because we are already have a restore in progress on the same snapshot.");
475    }
476
477    try {
478      FileSystem workingDirFS = workingDir.getFileSystem(master.getConfiguration());
479      // delete the working directory, since we aren't running the snapshot. Likely leftovers
480      // from a failed attempt.
481      workingDirFS.delete(workingDir, true);
482
483      // recreate the working directory for the snapshot
484      if (!workingDirFS.mkdirs(workingDir)) {
485        throw new SnapshotCreationException(
486          "Couldn't create working directory (" + workingDir + ") for snapshot",
487          ProtobufUtil.createSnapshotDesc(snapshot));
488      }
489    } catch (HBaseSnapshotException e) {
490      throw e;
491    } catch (IOException e) {
492      throw new SnapshotCreationException(
493        "Exception while checking to see if snapshot could be started.", e,
494        ProtobufUtil.createSnapshotDesc(snapshot));
495    }
496  }
497
498  /**
499   * Take a snapshot of a disabled table.
500   * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}.
501   * @throws IOException if the snapshot could not be started or filesystem for snapshot temporary
502   *                     directory could not be determined
503   */
504  private synchronized void snapshotDisabledTable(SnapshotDescription snapshot) throws IOException {
505    // setup the snapshot
506    prepareToTakeSnapshot(snapshot);
507
508    // set the snapshot to be a disabled snapshot, since the client doesn't know about that
509    snapshot = snapshot.toBuilder().setType(Type.DISABLED).build();
510
511    // Take the snapshot of the disabled table
512    DisabledTableSnapshotHandler handler = new DisabledTableSnapshotHandler(snapshot, master, this);
513    snapshotTable(snapshot, handler);
514  }
515
516  /**
517   * Take a snapshot of an enabled table.
518   * @param snapshot description of the snapshot to take.
519   * @throws IOException if the snapshot could not be started or filesystem for snapshot temporary
520   *                     directory could not be determined
521   */
522  private synchronized void snapshotEnabledTable(SnapshotDescription snapshot) throws IOException {
523    // setup the snapshot
524    prepareToTakeSnapshot(snapshot);
525
526    // Take the snapshot of the enabled table
527    EnabledTableSnapshotHandler handler = new EnabledTableSnapshotHandler(snapshot, master, this);
528    snapshotTable(snapshot, handler);
529  }
530
531  /**
532   * Take a snapshot using the specified handler. On failure the snapshot temporary working
533   * directory is removed. NOTE: prepareToTakeSnapshot() called before this one takes care of the
534   * rejecting the snapshot request if the table is busy with another snapshot/restore operation.
535   * @param snapshot the snapshot description
536   * @param handler  the snapshot handler
537   */
538  private synchronized void snapshotTable(SnapshotDescription snapshot,
539    final TakeSnapshotHandler handler) throws IOException {
540    try {
541      handler.prepare();
542      this.executorService.submit(handler);
543      this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler);
544    } catch (Exception e) {
545      // cleanup the working directory by trying to delete it from the fs.
546      Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
547        master.getConfiguration());
548      FileSystem workingDirFs = workingDir.getFileSystem(master.getConfiguration());
549      try {
550        if (!workingDirFs.delete(workingDir, true)) {
551          LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:"
552            + ClientSnapshotDescriptionUtils.toString(snapshot));
553        }
554      } catch (IOException e1) {
555        LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:"
556          + ClientSnapshotDescriptionUtils.toString(snapshot));
557      }
558      // fail the snapshot
559      throw new SnapshotCreationException("Could not build snapshot handler", e,
560        ProtobufUtil.createSnapshotDesc(snapshot));
561    }
562  }
563
564  public ReadWriteLock getTakingSnapshotLock() {
565    return this.takingSnapshotLock;
566  }
567
568  /**
569   * The snapshot operation processing as following: <br>
570   * 1. Create a Snapshot Handler, and do some initialization; <br>
571   * 2. Put the handler into snapshotHandlers <br>
572   * So when we consider if any snapshot is taking, we should consider both the takingSnapshotLock
573   * and snapshotHandlers;
574   * @return true to indicate that there're some running snapshots.
575   */
576  public synchronized boolean isTakingAnySnapshot() {
577    return this.takingSnapshotLock.getReadHoldCount() > 0 || this.snapshotHandlers.size() > 0;
578  }
579
580  /**
581   * Take a snapshot based on the enabled/disabled state of the table. n * @throws
582   * HBaseSnapshotException when a snapshot specific exception occurs.
583   * @throws IOException when some sort of generic IO exception occurs.
584   */
585  public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
586    this.takingSnapshotLock.readLock().lock();
587    try {
588      takeSnapshotInternal(snapshot);
589    } finally {
590      this.takingSnapshotLock.readLock().unlock();
591    }
592  }
593
594  private void takeSnapshotInternal(SnapshotDescription snapshot) throws IOException {
595    // check to see if we already completed the snapshot
596    if (isSnapshotCompleted(snapshot)) {
597      throw new SnapshotExistsException(
598        "Snapshot '" + snapshot.getName() + "' already stored on the filesystem.",
599        ProtobufUtil.createSnapshotDesc(snapshot));
600    }
601
602    LOG.debug("No existing snapshot, attempting snapshot...");
603
604    // stop tracking "abandoned" handlers
605    cleanupSentinels();
606
607    // check to see if the table exists
608    TableDescriptor desc = null;
609    try {
610      desc = master.getTableDescriptors().get(TableName.valueOf(snapshot.getTable()));
611    } catch (FileNotFoundException e) {
612      String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
613      LOG.error(msg);
614      throw new SnapshotCreationException(msg, e, ProtobufUtil.createSnapshotDesc(snapshot));
615    } catch (IOException e) {
616      throw new SnapshotCreationException(
617        "Error while geting table description for table " + snapshot.getTable(), e,
618        ProtobufUtil.createSnapshotDesc(snapshot));
619    }
620    if (desc == null) {
621      throw new SnapshotCreationException(
622        "Table '" + snapshot.getTable() + "' doesn't exist, can't take snapshot.",
623        ProtobufUtil.createSnapshotDesc(snapshot));
624    }
625    SnapshotDescription.Builder builder = snapshot.toBuilder();
626    // if not specified, set the snapshot format
627    if (!snapshot.hasVersion()) {
628      builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION);
629    }
630    RpcServer.getRequestUser().ifPresent(user -> {
631      if (AccessChecker.isAuthorizationSupported(master.getConfiguration())) {
632        builder.setOwner(user.getShortName());
633      }
634    });
635    snapshot = builder.build();
636
637    // call pre coproc hook
638    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
639    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
640    if (cpHost != null) {
641      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
642      cpHost.preSnapshot(snapshotPOJO, desc);
643    }
644
645    // if the table is enabled, then have the RS run actually the snapshot work
646    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
647    if (master.getTableStateManager().isTableState(snapshotTable, TableState.State.ENABLED)) {
648      if (LOG.isDebugEnabled()) {
649        LOG.debug("Table enabled, starting distributed snapshots for {}",
650          ClientSnapshotDescriptionUtils.toString(snapshot));
651      }
652      snapshotEnabledTable(snapshot);
653      if (LOG.isDebugEnabled()) {
654        LOG.debug("Started snapshot: {}", ClientSnapshotDescriptionUtils.toString(snapshot));
655      }
656    }
657    // For disabled table, snapshot is created by the master
658    else if (master.getTableStateManager().isTableState(snapshotTable, TableState.State.DISABLED)) {
659      if (LOG.isDebugEnabled()) {
660        LOG.debug("Table is disabled, running snapshot entirely on master for {}",
661          ClientSnapshotDescriptionUtils.toString(snapshot));
662      }
663      snapshotDisabledTable(snapshot);
664      if (LOG.isDebugEnabled()) {
665        LOG.debug("Started snapshot: {}", ClientSnapshotDescriptionUtils.toString(snapshot));
666      }
667    } else {
668      LOG.error("Can't snapshot table '" + snapshot.getTable()
669        + "', isn't open or closed, we don't know what to do!");
670      TablePartiallyOpenException tpoe =
671        new TablePartiallyOpenException(snapshot.getTable() + " isn't fully open.");
672      throw new SnapshotCreationException("Table is not entirely open or closed", tpoe,
673        ProtobufUtil.createSnapshotDesc(snapshot));
674    }
675
676    // call post coproc hook
677    if (cpHost != null) {
678      cpHost.postSnapshot(snapshotPOJO, desc);
679    }
680  }
681
682  /**
683   * Set the handler for the current snapshot
684   * <p>
685   * Exposed for TESTING n * @param handler handler the master should use TODO get rid of this if
686   * possible, repackaging, modify tests.
687   */
688  public synchronized void setSnapshotHandlerForTesting(final TableName tableName,
689    final SnapshotSentinel handler) {
690    if (handler != null) {
691      this.snapshotHandlers.put(tableName, handler);
692    } else {
693      this.snapshotHandlers.remove(tableName);
694    }
695  }
696
697  /** Returns distributed commit coordinator for all running snapshots */
698  ProcedureCoordinator getCoordinator() {
699    return coordinator;
700  }
701
702  /**
703   * Check to see if the snapshot is one of the currently completed snapshots Returns true if the
704   * snapshot exists in the "completed snapshots folder".
705   * @param snapshot expected snapshot to check
706   * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
707   *         not stored
708   * @throws IOException              if the filesystem throws an unexpected exception,
709   * @throws IllegalArgumentException if snapshot name is invalid.
710   */
711  private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
712    try {
713      final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
714      FileSystem fs = master.getMasterFileSystem().getFileSystem();
715      // check to see if the snapshot already exists
716      return fs.exists(snapshotDir);
717    } catch (IllegalArgumentException iae) {
718      throw new UnknownSnapshotException("Unexpected exception thrown", iae);
719    }
720  }
721
722  /**
723   * Clone the specified snapshot. The clone will fail if the destination table has a snapshot or
724   * restore in progress.
725   * @param reqSnapshot       Snapshot Descriptor from request
726   * @param tableName         table to clone
727   * @param snapshot          Snapshot Descriptor
728   * @param snapshotTableDesc Table Descriptor
729   * @param nonceKey          unique identifier to prevent duplicated RPC
730   * @return procId the ID of the clone snapshot procedure n
731   */
732  private long cloneSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName,
733    final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc,
734    final NonceKey nonceKey, final boolean restoreAcl, final String customSFT) throws IOException {
735    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
736    TableDescriptor htd = TableDescriptorBuilder.copy(tableName, snapshotTableDesc);
737    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
738    if (cpHost != null) {
739      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
740      cpHost.preCloneSnapshot(snapshotPOJO, htd);
741    }
742    long procId;
743    try {
744      procId = cloneSnapshot(snapshot, htd, nonceKey, restoreAcl, customSFT);
745    } catch (IOException e) {
746      LOG.error("Exception occurred while cloning the snapshot " + snapshot.getName() + " as table "
747        + tableName.getNameAsString(), e);
748      throw e;
749    }
750    LOG.info("Clone snapshot=" + snapshot.getName() + " as table=" + tableName);
751
752    if (cpHost != null) {
753      cpHost.postCloneSnapshot(snapshotPOJO, htd);
754    }
755    return procId;
756  }
757
758  /**
759   * Clone the specified snapshot into a new table. The operation will fail if the destination table
760   * has a snapshot or restore in progress.
761   * @param snapshot        Snapshot Descriptor
762   * @param tableDescriptor Table Descriptor of the table to create
763   * @param nonceKey        unique identifier to prevent duplicated RPC
764   * @return procId the ID of the clone snapshot procedure
765   */
766  synchronized long cloneSnapshot(final SnapshotDescription snapshot,
767    final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl,
768    final String customSFT) throws HBaseSnapshotException {
769    TableName tableName = tableDescriptor.getTableName();
770
771    // make sure we aren't running a snapshot on the same table
772    if (isTakingSnapshot(tableName)) {
773      throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
774    }
775
776    // make sure we aren't running a restore on the same table
777    if (isRestoringTable(tableName)) {
778      throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
779    }
780
781    try {
782      long procId = master.getMasterProcedureExecutor().submitProcedure(
783        new CloneSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(),
784          tableDescriptor, snapshot, restoreAcl, customSFT),
785        nonceKey);
786      this.restoreTableToProcIdMap.put(tableName, procId);
787      return procId;
788    } catch (Exception e) {
789      String msg = "Couldn't clone the snapshot="
790        + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + tableName;
791      LOG.error(msg, e);
792      throw new RestoreSnapshotException(msg, e);
793    }
794  }
795
796  /**
797   * Restore or Clone the specified snapshot n * @param nonceKey unique identifier to prevent
798   * duplicated RPC n
799   */
800  public long restoreOrCloneSnapshot(final SnapshotDescription reqSnapshot, final NonceKey nonceKey,
801    final boolean restoreAcl, String customSFT) throws IOException {
802    FileSystem fs = master.getMasterFileSystem().getFileSystem();
803    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir);
804
805    // check if the snapshot exists
806    if (!fs.exists(snapshotDir)) {
807      LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist.");
808      throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(reqSnapshot));
809    }
810
811    // Get snapshot info from file system. The reqSnapshot is a "fake" snapshotInfo with
812    // just the snapshot "name" and table name to restore. It does not contains the "real" snapshot
813    // information.
814    SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
815    SnapshotManifest manifest =
816      SnapshotManifest.open(master.getConfiguration(), fs, snapshotDir, snapshot);
817    TableDescriptor snapshotTableDesc = manifest.getTableDescriptor();
818    TableName tableName = TableName.valueOf(reqSnapshot.getTable());
819
820    // sanity check the new table descriptor
821    TableDescriptorChecker.sanityCheck(master.getConfiguration(), snapshotTableDesc);
822
823    // stop tracking "abandoned" handlers
824    cleanupSentinels();
825
826    // Verify snapshot validity
827    SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest);
828
829    // Execute the restore/clone operation
830    long procId;
831    if (master.getTableDescriptors().exists(tableName)) {
832      procId =
833        restoreSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey, restoreAcl);
834    } else {
835      procId = cloneSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey,
836        restoreAcl, customSFT);
837    }
838    return procId;
839  }
840
841  /**
842   * Restore the specified snapshot. The restore will fail if the destination table has a snapshot
843   * or restore in progress.
844   * @param reqSnapshot       Snapshot Descriptor from request
845   * @param tableName         table to restore
846   * @param snapshot          Snapshot Descriptor
847   * @param snapshotTableDesc Table Descriptor
848   * @param nonceKey          unique identifier to prevent duplicated RPC
849   * @param restoreAcl        true to restore acl of snapshot
850   * @return procId the ID of the restore snapshot procedure n
851   */
852  private long restoreSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName,
853    final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc,
854    final NonceKey nonceKey, final boolean restoreAcl) throws IOException {
855    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
856
857    // have to check first if restoring the snapshot would break current SFT setup
858    StoreFileTrackerValidationUtils.validatePreRestoreSnapshot(
859      master.getTableDescriptors().get(tableName), snapshotTableDesc, master.getConfiguration());
860
861    if (
862      master.getTableStateManager().isTableState(TableName.valueOf(snapshot.getTable()),
863        TableState.State.ENABLED)
864    ) {
865      throw new UnsupportedOperationException("Table '" + TableName.valueOf(snapshot.getTable())
866        + "' must be disabled in order to " + "perform a restore operation.");
867    }
868
869    // call Coprocessor pre hook
870    org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null;
871    if (cpHost != null) {
872      snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot);
873      cpHost.preRestoreSnapshot(snapshotPOJO, snapshotTableDesc);
874    }
875
876    long procId;
877    try {
878      procId = restoreSnapshot(snapshot, snapshotTableDesc, nonceKey, restoreAcl);
879    } catch (IOException e) {
880      LOG.error("Exception occurred while restoring the snapshot " + snapshot.getName()
881        + " as table " + tableName.getNameAsString(), e);
882      throw e;
883    }
884    LOG.info("Restore snapshot=" + snapshot.getName() + " as table=" + tableName);
885
886    if (cpHost != null) {
887      cpHost.postRestoreSnapshot(snapshotPOJO, snapshotTableDesc);
888    }
889
890    return procId;
891  }
892
893  /**
894   * Restore the specified snapshot. The restore will fail if the destination table has a snapshot
895   * or restore in progress.
896   * @param snapshot        Snapshot Descriptor
897   * @param tableDescriptor Table Descriptor
898   * @param nonceKey        unique identifier to prevent duplicated RPC
899   * @param restoreAcl      true to restore acl of snapshot
900   * @return procId the ID of the restore snapshot procedure
901   */
902  private synchronized long restoreSnapshot(final SnapshotDescription snapshot,
903    final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl)
904    throws HBaseSnapshotException {
905    final TableName tableName = tableDescriptor.getTableName();
906
907    // make sure we aren't running a snapshot on the same table
908    if (isTakingSnapshot(tableName)) {
909      throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
910    }
911
912    // make sure we aren't running a restore on the same table
913    if (isRestoringTable(tableName)) {
914      throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
915    }
916
917    try {
918      long procId = master.getMasterProcedureExecutor().submitProcedure(
919        new RestoreSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(),
920          tableDescriptor, snapshot, restoreAcl),
921        nonceKey);
922      this.restoreTableToProcIdMap.put(tableName, procId);
923      return procId;
924    } catch (Exception e) {
925      String msg = "Couldn't restore the snapshot="
926        + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + tableName;
927      LOG.error(msg, e);
928      throw new RestoreSnapshotException(msg, e);
929    }
930  }
931
932  /**
933   * Verify if the restore of the specified table is in progress.
934   * @param tableName table under restore
935   * @return <tt>true</tt> if there is a restore in progress of the specified table.
936   */
937  private synchronized boolean isRestoringTable(final TableName tableName) {
938    Long procId = this.restoreTableToProcIdMap.get(tableName);
939    if (procId == null) {
940      return false;
941    }
942    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
943    if (procExec.isRunning() && !procExec.isFinished(procId)) {
944      return true;
945    } else {
946      this.restoreTableToProcIdMap.remove(tableName);
947      return false;
948    }
949  }
950
951  /**
952   * Return the handler if it is currently live and has the same snapshot target name. The handler
953   * is removed from the sentinels map if completed.
954   * @param sentinels live handlers
955   * @param snapshot  snapshot description
956   * @return null if doesn't match, else a live handler.
957   */
958  private synchronized SnapshotSentinel removeSentinelIfFinished(
959    final Map<TableName, SnapshotSentinel> sentinels, final SnapshotDescription snapshot) {
960    if (!snapshot.hasTable()) {
961      return null;
962    }
963
964    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
965    SnapshotSentinel h = sentinels.get(snapshotTable);
966    if (h == null) {
967      return null;
968    }
969
970    if (!h.getSnapshot().getName().equals(snapshot.getName())) {
971      // specified snapshot is to the one currently running
972      return null;
973    }
974
975    // Remove from the "in-progress" list once completed
976    if (h.isFinished()) {
977      sentinels.remove(snapshotTable);
978    }
979
980    return h;
981  }
982
983  /**
984   * Removes "abandoned" snapshot/restore requests. As part of the HBaseAdmin snapshot/restore API
985   * the operation status is checked until completed, and the in-progress maps are cleaned up when
986   * the status of a completed task is requested. To avoid having sentinels staying around for long
987   * time if something client side is failed, each operation tries to clean up the in-progress maps
988   * sentinels finished from a long time.
989   */
990  private void cleanupSentinels() {
991    cleanupSentinels(this.snapshotHandlers);
992    cleanupCompletedRestoreInMap();
993  }
994
995  /**
996   * Remove the sentinels that are marked as finished and the completion time has exceeded the
997   * removal timeout.
998   * @param sentinels map of sentinels to clean
999   */
1000  private synchronized void cleanupSentinels(final Map<TableName, SnapshotSentinel> sentinels) {
1001    long currentTime = EnvironmentEdgeManager.currentTime();
1002    long sentinelsCleanupTimeoutMillis =
1003      master.getConfiguration().getLong(HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS,
1004        SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT);
1005    Iterator<Map.Entry<TableName, SnapshotSentinel>> it = sentinels.entrySet().iterator();
1006    while (it.hasNext()) {
1007      Map.Entry<TableName, SnapshotSentinel> entry = it.next();
1008      SnapshotSentinel sentinel = entry.getValue();
1009      if (
1010        sentinel.isFinished()
1011          && (currentTime - sentinel.getCompletionTimestamp()) > sentinelsCleanupTimeoutMillis
1012      ) {
1013        it.remove();
1014      }
1015    }
1016  }
1017
1018  /**
1019   * Remove the procedures that are marked as finished
1020   */
1021  private synchronized void cleanupCompletedRestoreInMap() {
1022    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
1023    Iterator<Map.Entry<TableName, Long>> it = restoreTableToProcIdMap.entrySet().iterator();
1024    while (it.hasNext()) {
1025      Map.Entry<TableName, Long> entry = it.next();
1026      Long procId = entry.getValue();
1027      if (procExec.isRunning() && procExec.isFinished(procId)) {
1028        it.remove();
1029      }
1030    }
1031  }
1032
1033  //
1034  // Implementing Stoppable interface
1035  //
1036
1037  @Override
1038  public void stop(String why) {
1039    // short circuit
1040    if (this.stopped) return;
1041    // make sure we get stop
1042    this.stopped = true;
1043    // pass the stop onto take snapshot handlers
1044    for (SnapshotSentinel snapshotHandler : this.snapshotHandlers.values()) {
1045      snapshotHandler.cancel(why);
1046    }
1047    if (snapshotHandlerChoreCleanerTask != null) {
1048      snapshotHandlerChoreCleanerTask.cancel(true);
1049    }
1050    try {
1051      if (coordinator != null) {
1052        coordinator.close();
1053      }
1054    } catch (IOException e) {
1055      LOG.error("stop ProcedureCoordinator error", e);
1056    }
1057  }
1058
1059  @Override
1060  public boolean isStopped() {
1061    return this.stopped;
1062  }
1063
1064  /**
1065   * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported.
1066   * Called at the beginning of snapshot() and restoreSnapshot() methods.
1067   * @throws UnsupportedOperationException if snapshot are not supported
1068   */
1069  public void checkSnapshotSupport() throws UnsupportedOperationException {
1070    if (!this.isSnapshotSupported) {
1071      throw new UnsupportedOperationException(
1072        "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '"
1073          + HBASE_SNAPSHOT_ENABLED + "' property with value 'true'.");
1074    }
1075  }
1076
1077  /**
1078   * Called at startup, to verify if snapshot operation is supported, and to avoid starting the
1079   * master if there're snapshots present but the cleaners needed are missing. Otherwise we can end
1080   * up with snapshot data loss.
1081   * @param conf The {@link Configuration} object to use
1082   * @param mfs  The MasterFileSystem to use
1083   * @throws IOException                   in case of file-system operation failure
1084   * @throws UnsupportedOperationException in case cleaners are missing and there're snapshot in the
1085   *                                       system
1086   */
1087  private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs)
1088    throws IOException, UnsupportedOperationException {
1089    // Verify if snapshot is disabled by the user
1090    String enabled = conf.get(HBASE_SNAPSHOT_ENABLED);
1091    boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false);
1092    boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled);
1093
1094    // Extract cleaners from conf
1095    Set<String> hfileCleaners = new HashSet<>();
1096    String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
1097    if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
1098
1099    Set<String> logCleaners = new HashSet<>();
1100    cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
1101    if (cleaners != null) Collections.addAll(logCleaners, cleaners);
1102
1103    // check if an older version of snapshot directory was present
1104    Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME);
1105    FileSystem fs = mfs.getFileSystem();
1106    List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir), false);
1107    if (ss != null && !ss.isEmpty()) {
1108      LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir);
1109      LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME);
1110    }
1111
1112    // If the user has enabled the snapshot, we force the cleaners to be present
1113    // otherwise we still need to check if cleaners are enabled or not and verify
1114    // that there're no snapshot in the .snapshot folder.
1115    if (snapshotEnabled) {
1116      // Inject snapshot cleaners, if snapshot.enable is true
1117      hfileCleaners.add(SnapshotHFileCleaner.class.getName());
1118      hfileCleaners.add(HFileLinkCleaner.class.getName());
1119      // If sync acl to HDFS feature is enabled, then inject the cleaner
1120      if (SnapshotScannerHDFSAclHelper.isAclSyncToHdfsEnabled(conf)) {
1121        hfileCleaners.add(SnapshotScannerHDFSAclCleaner.class.getName());
1122      }
1123
1124      // Set cleaners conf
1125      conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
1126        hfileCleaners.toArray(new String[hfileCleaners.size()]));
1127      conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS,
1128        logCleaners.toArray(new String[logCleaners.size()]));
1129    } else {
1130      // There may be restore tables if snapshot is enabled and then disabled, so add
1131      // HFileLinkCleaner, see HBASE-26670 for more details.
1132      hfileCleaners.add(HFileLinkCleaner.class.getName());
1133      conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
1134        hfileCleaners.toArray(new String[hfileCleaners.size()]));
1135      // Verify if SnapshotHFileCleaner are present
1136      snapshotEnabled = hfileCleaners.contains(SnapshotHFileCleaner.class.getName());
1137
1138      // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set.
1139      if (snapshotEnabled) {
1140        LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " + "but the '"
1141          + HBASE_SNAPSHOT_ENABLED + "' property "
1142          + (userDisabled ? "is set to 'false'." : "is not set."));
1143      }
1144    }
1145
1146    // Mark snapshot feature as enabled if cleaners are present and user has not disabled it.
1147    this.isSnapshotSupported = snapshotEnabled && !userDisabled;
1148
1149    // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder
1150    // otherwise we end up with snapshot data loss.
1151    if (!snapshotEnabled) {
1152      LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners.");
1153      Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir());
1154      if (fs.exists(snapshotDir)) {
1155        FileStatus[] snapshots = CommonFSUtils.listStatus(fs, snapshotDir,
1156          new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
1157        if (snapshots != null) {
1158          LOG.error("Snapshots are present, but cleaners are not enabled.");
1159          checkSnapshotSupport();
1160        }
1161      }
1162    }
1163  }
1164
1165  @Override
1166  public void initialize(MasterServices master, MetricsMaster metricsMaster)
1167    throws KeeperException, IOException, UnsupportedOperationException {
1168    this.master = master;
1169
1170    this.rootDir = master.getMasterFileSystem().getRootDir();
1171    checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
1172
1173    // get the configuration for the coordinator
1174    Configuration conf = master.getConfiguration();
1175    long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
1176    long timeoutMillis = Math.max(
1177      conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY,
1178        SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT),
1179      conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS,
1180        SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME));
1181    int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
1182
1183    // setup the default procedure coordinator
1184    String name = master.getServerName().toString();
1185    ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, opThreads);
1186    ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinator(master.getZooKeeper(),
1187      SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name);
1188
1189    this.coordinator = new ProcedureCoordinator(comms, tpool, timeoutMillis, wakeFrequency);
1190    this.executorService = master.getExecutorService();
1191    resetTempDir();
1192    snapshotHandlerChoreCleanerTask =
1193      scheduleThreadPool.scheduleAtFixedRate(this::cleanupSentinels, 10, 10, TimeUnit.SECONDS);
1194  }
1195
1196  @Override
1197  public String getProcedureSignature() {
1198    return ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION;
1199  }
1200
1201  @Override
1202  public void execProcedure(ProcedureDescription desc) throws IOException {
1203    takeSnapshot(toSnapshotDescription(desc));
1204  }
1205
1206  @Override
1207  public void checkPermissions(ProcedureDescription desc, AccessChecker accessChecker, User user)
1208    throws IOException {
1209    // Done by AccessController as part of preSnapshot coprocessor hook (legacy code path).
1210    // In future, when we AC is removed for good, that check should be moved here.
1211  }
1212
1213  @Override
1214  public boolean isProcedureDone(ProcedureDescription desc) throws IOException {
1215    return isSnapshotDone(toSnapshotDescription(desc));
1216  }
1217
1218  private SnapshotDescription toSnapshotDescription(ProcedureDescription desc) throws IOException {
1219    SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
1220    if (!desc.hasInstance()) {
1221      throw new IOException("Snapshot name is not defined: " + desc.toString());
1222    }
1223    String snapshotName = desc.getInstance();
1224    List<NameStringPair> props = desc.getConfigurationList();
1225    String table = null;
1226    for (NameStringPair prop : props) {
1227      if ("table".equalsIgnoreCase(prop.getName())) {
1228        table = prop.getValue();
1229      }
1230    }
1231    if (table == null) {
1232      throw new IOException("Snapshot table is not defined: " + desc.toString());
1233    }
1234    TableName tableName = TableName.valueOf(table);
1235    builder.setTable(tableName.getNameAsString());
1236    builder.setName(snapshotName);
1237    builder.setType(SnapshotDescription.Type.FLUSH);
1238    return builder.build();
1239  }
1240}