001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.snapshot; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Collections; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.Iterator; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030import java.util.concurrent.ConcurrentHashMap; 031import java.util.concurrent.Executors; 032import java.util.concurrent.ScheduledExecutorService; 033import java.util.concurrent.ScheduledFuture; 034import java.util.concurrent.ThreadPoolExecutor; 035import java.util.concurrent.TimeUnit; 036import java.util.concurrent.locks.ReadWriteLock; 037import java.util.concurrent.locks.ReentrantReadWriteLock; 038import org.apache.hadoop.conf.Configuration; 039import org.apache.hadoop.fs.FSDataInputStream; 040import org.apache.hadoop.fs.FileStatus; 041import org.apache.hadoop.fs.FileSystem; 042import org.apache.hadoop.fs.Path; 043import org.apache.hadoop.hbase.HBaseInterfaceAudience; 044import org.apache.hadoop.hbase.HConstants; 045import org.apache.hadoop.hbase.Stoppable; 046import org.apache.hadoop.hbase.TableName; 047import org.apache.hadoop.hbase.client.TableDescriptor; 048import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 049import org.apache.hadoop.hbase.client.TableState; 050import org.apache.hadoop.hbase.errorhandling.ForeignException; 051import org.apache.hadoop.hbase.executor.ExecutorService; 052import org.apache.hadoop.hbase.ipc.RpcServer; 053import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 054import org.apache.hadoop.hbase.master.MasterFileSystem; 055import org.apache.hadoop.hbase.master.MasterServices; 056import org.apache.hadoop.hbase.master.MetricsMaster; 057import org.apache.hadoop.hbase.master.SnapshotSentinel; 058import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; 059import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner; 060import org.apache.hadoop.hbase.master.procedure.CloneSnapshotProcedure; 061import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 062import org.apache.hadoop.hbase.master.procedure.RestoreSnapshotProcedure; 063import org.apache.hadoop.hbase.procedure.MasterProcedureManager; 064import org.apache.hadoop.hbase.procedure.Procedure; 065import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; 066import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs; 067import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinator; 068import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 069import org.apache.hadoop.hbase.security.AccessDeniedException; 070import org.apache.hadoop.hbase.security.User; 071import org.apache.hadoop.hbase.security.access.AccessChecker; 072import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclCleaner; 073import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper; 074import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 075import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; 076import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; 077import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; 078import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 079import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException; 080import org.apache.hadoop.hbase.snapshot.SnapshotExistsException; 081import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 082import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; 083import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException; 084import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; 085import org.apache.hadoop.hbase.util.CommonFSUtils; 086import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 087import org.apache.hadoop.hbase.util.NonceKey; 088import org.apache.hadoop.hbase.util.TableDescriptorChecker; 089import org.apache.yetus.audience.InterfaceAudience; 090import org.apache.yetus.audience.InterfaceStability; 091import org.apache.zookeeper.KeeperException; 092import org.slf4j.Logger; 093import org.slf4j.LoggerFactory; 094 095import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; 096 097import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 098import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameStringPair; 099import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ProcedureDescription; 100import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 101import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription.Type; 102 103/** 104 * This class manages the procedure of taking and restoring snapshots. There is only one 105 * SnapshotManager for the master. 106 * <p> 107 * The class provides methods for monitoring in-progress snapshot actions. 108 * <p> 109 * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a 110 * simplification in the current implementation. 111 */ 112@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) 113@InterfaceStability.Unstable 114public class SnapshotManager extends MasterProcedureManager implements Stoppable { 115 private static final Logger LOG = LoggerFactory.getLogger(SnapshotManager.class); 116 117 /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */ 118 private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500; 119 120 /** 121 * Wait time before removing a finished sentinel from the in-progress map 122 * 123 * NOTE: This is used as a safety auto cleanup. 124 * The snapshot and restore handlers map entries are removed when a user asks if a snapshot or 125 * restore is completed. This operation is part of the HBaseAdmin snapshot/restore API flow. 126 * In case something fails on the client side and the snapshot/restore state is not reclaimed 127 * after a default timeout, the entry is removed from the in-progress map. 128 * At this point, if the user asks for the snapshot/restore status, the result will be 129 * snapshot done if exists or failed if it doesn't exists. 130 */ 131 public static final String HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS = 132 "hbase.snapshot.sentinels.cleanup.timeoutMillis"; 133 public static final long SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT = 60 * 1000L; 134 135 /** Enable or disable snapshot support */ 136 public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled"; 137 138 /** 139 * Conf key for # of ms elapsed between checks for snapshot errors while waiting for 140 * completion. 141 */ 142 private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis"; 143 144 /** Name of the operation to use in the controller */ 145 public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot"; 146 147 /** Conf key for # of threads used by the SnapshotManager thread pool */ 148 public static final String SNAPSHOT_POOL_THREADS_KEY = "hbase.snapshot.master.threads"; 149 150 /** number of current operations running on the master */ 151 public static final int SNAPSHOT_POOL_THREADS_DEFAULT = 1; 152 153 private boolean stopped; 154 private MasterServices master; // Needed by TableEventHandlers 155 private ProcedureCoordinator coordinator; 156 157 // Is snapshot feature enabled? 158 private boolean isSnapshotSupported = false; 159 160 // Snapshot handlers map, with table name as key. 161 // The map is always accessed and modified under the object lock using synchronized. 162 // snapshotTable() will insert an Handler in the table. 163 // isSnapshotDone() will remove the handler requested if the operation is finished. 164 private final Map<TableName, SnapshotSentinel> snapshotHandlers = new ConcurrentHashMap<>(); 165 private final ScheduledExecutorService scheduleThreadPool = 166 Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder() 167 .setNameFormat("SnapshotHandlerChoreCleaner").setDaemon(true).build()); 168 private ScheduledFuture<?> snapshotHandlerChoreCleanerTask; 169 170 // Restore map, with table name as key, procedure ID as value. 171 // The map is always accessed and modified under the object lock using synchronized. 172 // restoreSnapshot()/cloneSnapshot() will insert a procedure ID in the map. 173 // 174 // TODO: just as the Apache HBase 1.x implementation, this map would not survive master 175 // restart/failover. This is just a stopgap implementation until implementation of taking 176 // snapshot using Procedure-V2. 177 private Map<TableName, Long> restoreTableToProcIdMap = new HashMap<>(); 178 179 private Path rootDir; 180 private ExecutorService executorService; 181 182 /** 183 * Read write lock between taking snapshot and snapshot HFile cleaner. The cleaner should skip to 184 * check the HFiles if any snapshot is in progress, otherwise it may clean a HFile which would 185 * belongs to the newly creating snapshot. So we should grab the write lock first when cleaner 186 * start to work. (See HBASE-21387) 187 */ 188 private ReentrantReadWriteLock takingSnapshotLock = new ReentrantReadWriteLock(true); 189 190 public SnapshotManager() {} 191 192 /** 193 * Fully specify all necessary components of a snapshot manager. Exposed for testing. 194 * @param master services for the master where the manager is running 195 * @param coordinator procedure coordinator instance. exposed for testing. 196 * @param pool HBase ExecutorServcie instance, exposed for testing. 197 */ 198 @InterfaceAudience.Private 199 SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator, 200 ExecutorService pool, int sentinelCleanInterval) 201 throws IOException, UnsupportedOperationException { 202 this.master = master; 203 204 this.rootDir = master.getMasterFileSystem().getRootDir(); 205 Configuration conf = master.getConfiguration(); 206 checkSnapshotSupport(conf, master.getMasterFileSystem()); 207 208 this.coordinator = coordinator; 209 this.executorService = pool; 210 resetTempDir(); 211 snapshotHandlerChoreCleanerTask = this.scheduleThreadPool.scheduleAtFixedRate( 212 this::cleanupSentinels, sentinelCleanInterval, sentinelCleanInterval, TimeUnit.SECONDS); 213 } 214 215 /** 216 * Gets the list of all completed snapshots. 217 * @return list of SnapshotDescriptions 218 * @throws IOException File system exception 219 */ 220 public List<SnapshotDescription> getCompletedSnapshots() throws IOException { 221 return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir), true); 222 } 223 224 /** 225 * Gets the list of all completed snapshots. 226 * @param snapshotDir snapshot directory 227 * @param withCpCall Whether to call CP hooks 228 * @return list of SnapshotDescriptions 229 * @throws IOException File system exception 230 */ 231 private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir, boolean withCpCall) 232 throws IOException { 233 List<SnapshotDescription> snapshotDescs = new ArrayList<>(); 234 // first create the snapshot root path and check to see if it exists 235 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 236 if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); 237 238 // if there are no snapshots, return an empty list 239 if (!fs.exists(snapshotDir)) { 240 return snapshotDescs; 241 } 242 243 // ignore all the snapshots in progress 244 FileStatus[] snapshots = fs.listStatus(snapshotDir, 245 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 246 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 247 withCpCall = withCpCall && cpHost != null; 248 // loop through all the completed snapshots 249 for (FileStatus snapshot : snapshots) { 250 Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); 251 // if the snapshot is bad 252 if (!fs.exists(info)) { 253 LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist"); 254 continue; 255 } 256 FSDataInputStream in = null; 257 try { 258 in = fs.open(info); 259 SnapshotDescription desc = SnapshotDescription.parseFrom(in); 260 org.apache.hadoop.hbase.client.SnapshotDescription descPOJO = (withCpCall) 261 ? ProtobufUtil.createSnapshotDesc(desc) : null; 262 if (withCpCall) { 263 try { 264 cpHost.preListSnapshot(descPOJO); 265 } catch (AccessDeniedException e) { 266 LOG.warn("Current user does not have access to " + desc.getName() + " snapshot. " 267 + "Either you should be owner of this snapshot or admin user."); 268 // Skip this and try for next snapshot 269 continue; 270 } 271 } 272 snapshotDescs.add(desc); 273 274 // call coproc post hook 275 if (withCpCall) { 276 cpHost.postListSnapshot(descPOJO); 277 } 278 } catch (IOException e) { 279 LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e); 280 } finally { 281 if (in != null) { 282 in.close(); 283 } 284 } 285 } 286 return snapshotDescs; 287 } 288 289 /** 290 * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed 291 * snapshot attempts. 292 * 293 * @throws IOException if we can't reach the filesystem 294 */ 295 private void resetTempDir() throws IOException { 296 // cleanup any existing snapshots. 297 Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir, 298 master.getConfiguration()); 299 FileSystem tmpFs = tmpdir.getFileSystem(master.getConfiguration()); 300 if (!tmpFs.delete(tmpdir, true)) { 301 LOG.warn("Couldn't delete working snapshot directory: " + tmpdir); 302 } 303 } 304 305 /** 306 * Delete the specified snapshot 307 * @param snapshot 308 * @throws SnapshotDoesNotExistException If the specified snapshot does not exist. 309 * @throws IOException For filesystem IOExceptions 310 */ 311 public void deleteSnapshot(SnapshotDescription snapshot) throws IOException { 312 // check to see if it is completed 313 if (!isSnapshotCompleted(snapshot)) { 314 throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(snapshot)); 315 } 316 317 String snapshotName = snapshot.getName(); 318 // first create the snapshot description and check to see if it exists 319 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 320 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); 321 // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with 322 // just the "name" and it does not contains the "real" snapshot information 323 snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); 324 325 // call coproc pre hook 326 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 327 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 328 if (cpHost != null) { 329 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 330 cpHost.preDeleteSnapshot(snapshotPOJO); 331 } 332 333 LOG.debug("Deleting snapshot: " + snapshotName); 334 // delete the existing snapshot 335 if (!fs.delete(snapshotDir, true)) { 336 throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir); 337 } 338 339 // call coproc post hook 340 if (cpHost != null) { 341 cpHost.postDeleteSnapshot(snapshotPOJO); 342 } 343 344 } 345 346 /** 347 * Check if the specified snapshot is done 348 * 349 * @param expected 350 * @return true if snapshot is ready to be restored, false if it is still being taken. 351 * @throws IOException IOException if error from HDFS or RPC 352 * @throws UnknownSnapshotException if snapshot is invalid or does not exist. 353 */ 354 public boolean isSnapshotDone(SnapshotDescription expected) throws IOException { 355 // check the request to make sure it has a snapshot 356 if (expected == null) { 357 throw new UnknownSnapshotException( 358 "No snapshot name passed in request, can't figure out which snapshot you want to check."); 359 } 360 361 String ssString = ClientSnapshotDescriptionUtils.toString(expected); 362 363 // check to see if the sentinel exists, 364 // and if the task is complete removes it from the in-progress snapshots map. 365 SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected); 366 367 // stop tracking "abandoned" handlers 368 cleanupSentinels(); 369 370 if (handler == null) { 371 // If there's no handler in the in-progress map, it means one of the following: 372 // - someone has already requested the snapshot state 373 // - the requested snapshot was completed long time ago (cleanupSentinels() timeout) 374 // - the snapshot was never requested 375 // In those cases returns to the user the "done state" if the snapshots exists on disk, 376 // otherwise raise an exception saying that the snapshot is not running and doesn't exist. 377 if (!isSnapshotCompleted(expected)) { 378 throw new UnknownSnapshotException("Snapshot " + ssString 379 + " is not currently running or one of the known completed snapshots."); 380 } 381 // was done, return true; 382 return true; 383 } 384 385 // pass on any failure we find in the sentinel 386 try { 387 handler.rethrowExceptionIfFailed(); 388 } catch (ForeignException e) { 389 // Give some procedure info on an exception. 390 String status; 391 Procedure p = coordinator.getProcedure(expected.getName()); 392 if (p != null) { 393 status = p.getStatus(); 394 } else { 395 status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames(); 396 } 397 throw new HBaseSnapshotException("Snapshot " + ssString + " had an error. " + status, e, 398 ProtobufUtil.createSnapshotDesc(expected)); 399 } 400 401 // check to see if we are done 402 if (handler.isFinished()) { 403 LOG.debug("Snapshot '" + ssString + "' has completed, notifying client."); 404 return true; 405 } else if (LOG.isDebugEnabled()) { 406 LOG.debug("Snapshoting '" + ssString + "' is still in progress!"); 407 } 408 return false; 409 } 410 411 /** 412 * Check to see if there is a snapshot in progress with the same name or on the same table. 413 * Currently we have a limitation only allowing a single snapshot per table at a time. Also we 414 * don't allow snapshot with the same name. 415 * @param snapshot description of the snapshot being checked. 416 * @return <tt>true</tt> if there is a snapshot in progress with the same name or on the same 417 * table. 418 */ 419 synchronized boolean isTakingSnapshot(final SnapshotDescription snapshot) { 420 TableName snapshotTable = TableName.valueOf(snapshot.getTable()); 421 if (isTakingSnapshot(snapshotTable)) { 422 return true; 423 } 424 Iterator<Map.Entry<TableName, SnapshotSentinel>> it = this.snapshotHandlers.entrySet().iterator(); 425 while (it.hasNext()) { 426 Map.Entry<TableName, SnapshotSentinel> entry = it.next(); 427 SnapshotSentinel sentinel = entry.getValue(); 428 if (snapshot.getName().equals(sentinel.getSnapshot().getName()) && !sentinel.isFinished()) { 429 return true; 430 } 431 } 432 return false; 433 } 434 435 /** 436 * Check to see if the specified table has a snapshot in progress. Currently we have a 437 * limitation only allowing a single snapshot per table at a time. 438 * @param tableName name of the table being snapshotted. 439 * @return <tt>true</tt> if there is a snapshot in progress on the specified table. 440 */ 441 public boolean isTakingSnapshot(final TableName tableName) { 442 SnapshotSentinel handler = this.snapshotHandlers.get(tableName); 443 return handler != null && !handler.isFinished(); 444 } 445 446 /** 447 * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we 448 * aren't already running a snapshot or restore on the requested table. 449 * @param snapshot description of the snapshot we want to start 450 * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot 451 */ 452 private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot) 453 throws HBaseSnapshotException { 454 Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, 455 master.getConfiguration()); 456 TableName snapshotTable = 457 TableName.valueOf(snapshot.getTable()); 458 459 // make sure we aren't already running a snapshot 460 if (isTakingSnapshot(snapshot)) { 461 SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable); 462 throw new SnapshotCreationException("Rejected taking " 463 + ClientSnapshotDescriptionUtils.toString(snapshot) 464 + " because we are already running another snapshot " 465 + (handler != null ? ("on the same table " + 466 ClientSnapshotDescriptionUtils.toString(handler.getSnapshot())) 467 : "with the same name"), ProtobufUtil.createSnapshotDesc(snapshot)); 468 } 469 470 // make sure we aren't running a restore on the same table 471 if (isRestoringTable(snapshotTable)) { 472 throw new SnapshotCreationException("Rejected taking " 473 + ClientSnapshotDescriptionUtils.toString(snapshot) 474 + " because we are already have a restore in progress on the same snapshot."); 475 } 476 477 try { 478 FileSystem workingDirFS = workingDir.getFileSystem(master.getConfiguration()); 479 // delete the working directory, since we aren't running the snapshot. Likely leftovers 480 // from a failed attempt. 481 workingDirFS.delete(workingDir, true); 482 483 // recreate the working directory for the snapshot 484 if (!workingDirFS.mkdirs(workingDir)) { 485 throw new SnapshotCreationException("Couldn't create working directory (" + workingDir 486 + ") for snapshot" , ProtobufUtil.createSnapshotDesc(snapshot)); 487 } 488 } catch (HBaseSnapshotException e) { 489 throw e; 490 } catch (IOException e) { 491 throw new SnapshotCreationException( 492 "Exception while checking to see if snapshot could be started.", e, 493 ProtobufUtil.createSnapshotDesc(snapshot)); 494 } 495 } 496 497 /** 498 * Take a snapshot of a disabled table. 499 * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}. 500 * @throws IOException if the snapshot could not be started or filesystem for snapshot 501 * temporary directory could not be determined 502 */ 503 private synchronized void snapshotDisabledTable(SnapshotDescription snapshot) 504 throws IOException { 505 // setup the snapshot 506 prepareToTakeSnapshot(snapshot); 507 508 // set the snapshot to be a disabled snapshot, since the client doesn't know about that 509 snapshot = snapshot.toBuilder().setType(Type.DISABLED).build(); 510 511 // Take the snapshot of the disabled table 512 DisabledTableSnapshotHandler handler = 513 new DisabledTableSnapshotHandler(snapshot, master, this); 514 snapshotTable(snapshot, handler); 515 } 516 517 /** 518 * Take a snapshot of an enabled table. 519 * @param snapshot description of the snapshot to take. 520 * @throws IOException if the snapshot could not be started or filesystem for snapshot 521 * temporary directory could not be determined 522 */ 523 private synchronized void snapshotEnabledTable(SnapshotDescription snapshot) 524 throws IOException { 525 // setup the snapshot 526 prepareToTakeSnapshot(snapshot); 527 528 // Take the snapshot of the enabled table 529 EnabledTableSnapshotHandler handler = 530 new EnabledTableSnapshotHandler(snapshot, master, this); 531 snapshotTable(snapshot, handler); 532 } 533 534 /** 535 * Take a snapshot using the specified handler. 536 * On failure the snapshot temporary working directory is removed. 537 * NOTE: prepareToTakeSnapshot() called before this one takes care of the rejecting the 538 * snapshot request if the table is busy with another snapshot/restore operation. 539 * @param snapshot the snapshot description 540 * @param handler the snapshot handler 541 */ 542 private synchronized void snapshotTable(SnapshotDescription snapshot, 543 final TakeSnapshotHandler handler) throws IOException { 544 try { 545 handler.prepare(); 546 this.executorService.submit(handler); 547 this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler); 548 } catch (Exception e) { 549 // cleanup the working directory by trying to delete it from the fs. 550 Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, 551 master.getConfiguration()); 552 FileSystem workingDirFs = workingDir.getFileSystem(master.getConfiguration()); 553 try { 554 if (!workingDirFs.delete(workingDir, true)) { 555 LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" + 556 ClientSnapshotDescriptionUtils.toString(snapshot)); 557 } 558 } catch (IOException e1) { 559 LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" + 560 ClientSnapshotDescriptionUtils.toString(snapshot)); 561 } 562 // fail the snapshot 563 throw new SnapshotCreationException("Could not build snapshot handler", e, 564 ProtobufUtil.createSnapshotDesc(snapshot)); 565 } 566 } 567 568 public ReadWriteLock getTakingSnapshotLock() { 569 return this.takingSnapshotLock; 570 } 571 572 /** 573 * The snapshot operation processing as following: <br> 574 * 1. Create a Snapshot Handler, and do some initialization; <br> 575 * 2. Put the handler into snapshotHandlers <br> 576 * So when we consider if any snapshot is taking, we should consider both the takingSnapshotLock 577 * and snapshotHandlers; 578 * @return true to indicate that there're some running snapshots. 579 */ 580 public synchronized boolean isTakingAnySnapshot() { 581 return this.takingSnapshotLock.getReadHoldCount() > 0 || this.snapshotHandlers.size() > 0; 582 } 583 584 /** 585 * Take a snapshot based on the enabled/disabled state of the table. 586 * @param snapshot 587 * @throws HBaseSnapshotException when a snapshot specific exception occurs. 588 * @throws IOException when some sort of generic IO exception occurs. 589 */ 590 public void takeSnapshot(SnapshotDescription snapshot) throws IOException { 591 this.takingSnapshotLock.readLock().lock(); 592 try { 593 takeSnapshotInternal(snapshot); 594 } finally { 595 this.takingSnapshotLock.readLock().unlock(); 596 } 597 } 598 599 private void takeSnapshotInternal(SnapshotDescription snapshot) throws IOException { 600 // check to see if we already completed the snapshot 601 if (isSnapshotCompleted(snapshot)) { 602 throw new SnapshotExistsException( 603 "Snapshot '" + snapshot.getName() + "' already stored on the filesystem.", 604 ProtobufUtil.createSnapshotDesc(snapshot)); 605 } 606 607 LOG.debug("No existing snapshot, attempting snapshot..."); 608 609 // stop tracking "abandoned" handlers 610 cleanupSentinels(); 611 612 // check to see if the table exists 613 TableDescriptor desc = null; 614 try { 615 desc = master.getTableDescriptors().get( 616 TableName.valueOf(snapshot.getTable())); 617 } catch (FileNotFoundException e) { 618 String msg = "Table:" + snapshot.getTable() + " info doesn't exist!"; 619 LOG.error(msg); 620 throw new SnapshotCreationException(msg, e, ProtobufUtil.createSnapshotDesc(snapshot)); 621 } catch (IOException e) { 622 throw new SnapshotCreationException( 623 "Error while geting table description for table " + snapshot.getTable(), e, 624 ProtobufUtil.createSnapshotDesc(snapshot)); 625 } 626 if (desc == null) { 627 throw new SnapshotCreationException( 628 "Table '" + snapshot.getTable() + "' doesn't exist, can't take snapshot.", 629 ProtobufUtil.createSnapshotDesc(snapshot)); 630 } 631 SnapshotDescription.Builder builder = snapshot.toBuilder(); 632 // if not specified, set the snapshot format 633 if (!snapshot.hasVersion()) { 634 builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION); 635 } 636 RpcServer.getRequestUser().ifPresent(user -> { 637 if (AccessChecker.isAuthorizationSupported(master.getConfiguration())) { 638 builder.setOwner(user.getShortName()); 639 } 640 }); 641 snapshot = builder.build(); 642 643 // call pre coproc hook 644 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 645 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 646 if (cpHost != null) { 647 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 648 cpHost.preSnapshot(snapshotPOJO, desc); 649 } 650 651 // if the table is enabled, then have the RS run actually the snapshot work 652 TableName snapshotTable = TableName.valueOf(snapshot.getTable()); 653 if (master.getTableStateManager().isTableState(snapshotTable, 654 TableState.State.ENABLED)) { 655 if (LOG.isDebugEnabled()) { 656 LOG.debug("Table enabled, starting distributed snapshots for {}", 657 ClientSnapshotDescriptionUtils.toString(snapshot)); 658 } 659 snapshotEnabledTable(snapshot); 660 if (LOG.isDebugEnabled()) { 661 LOG.debug("Started snapshot: {}", ClientSnapshotDescriptionUtils.toString(snapshot)); 662 } 663 } 664 // For disabled table, snapshot is created by the master 665 else if (master.getTableStateManager().isTableState(snapshotTable, 666 TableState.State.DISABLED)) { 667 if (LOG.isDebugEnabled()) { 668 LOG.debug("Table is disabled, running snapshot entirely on master for {}", 669 ClientSnapshotDescriptionUtils.toString(snapshot)); 670 } 671 snapshotDisabledTable(snapshot); 672 if (LOG.isDebugEnabled()) { 673 LOG.debug("Started snapshot: {}", ClientSnapshotDescriptionUtils.toString(snapshot)); 674 } 675 } else { 676 LOG.error("Can't snapshot table '" + snapshot.getTable() 677 + "', isn't open or closed, we don't know what to do!"); 678 TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable() 679 + " isn't fully open."); 680 throw new SnapshotCreationException("Table is not entirely open or closed", tpoe, 681 ProtobufUtil.createSnapshotDesc(snapshot)); 682 } 683 684 // call post coproc hook 685 if (cpHost != null) { 686 cpHost.postSnapshot(snapshotPOJO, desc); 687 } 688 } 689 690 /** 691 * Set the handler for the current snapshot 692 * <p> 693 * Exposed for TESTING 694 * @param tableName 695 * @param handler handler the master should use 696 * 697 * TODO get rid of this if possible, repackaging, modify tests. 698 */ 699 public synchronized void setSnapshotHandlerForTesting( 700 final TableName tableName, 701 final SnapshotSentinel handler) { 702 if (handler != null) { 703 this.snapshotHandlers.put(tableName, handler); 704 } else { 705 this.snapshotHandlers.remove(tableName); 706 } 707 } 708 709 /** 710 * @return distributed commit coordinator for all running snapshots 711 */ 712 ProcedureCoordinator getCoordinator() { 713 return coordinator; 714 } 715 716 /** 717 * Check to see if the snapshot is one of the currently completed snapshots 718 * Returns true if the snapshot exists in the "completed snapshots folder". 719 * 720 * @param snapshot expected snapshot to check 721 * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is 722 * not stored 723 * @throws IOException if the filesystem throws an unexpected exception, 724 * @throws IllegalArgumentException if snapshot name is invalid. 725 */ 726 private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException { 727 try { 728 final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); 729 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 730 // check to see if the snapshot already exists 731 return fs.exists(snapshotDir); 732 } catch (IllegalArgumentException iae) { 733 throw new UnknownSnapshotException("Unexpected exception thrown", iae); 734 } 735 } 736 737 /** 738 * Clone the specified snapshot. 739 * The clone will fail if the destination table has a snapshot or restore in progress. 740 * 741 * @param reqSnapshot Snapshot Descriptor from request 742 * @param tableName table to clone 743 * @param snapshot Snapshot Descriptor 744 * @param snapshotTableDesc Table Descriptor 745 * @param nonceKey unique identifier to prevent duplicated RPC 746 * @return procId the ID of the clone snapshot procedure 747 * @throws IOException 748 */ 749 private long cloneSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName, 750 final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc, 751 final NonceKey nonceKey, final boolean restoreAcl) throws IOException { 752 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 753 TableDescriptor htd = TableDescriptorBuilder.copy(tableName, snapshotTableDesc); 754 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 755 if (cpHost != null) { 756 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 757 cpHost.preCloneSnapshot(snapshotPOJO, htd); 758 } 759 long procId; 760 try { 761 procId = cloneSnapshot(snapshot, htd, nonceKey, restoreAcl); 762 } catch (IOException e) { 763 LOG.error("Exception occurred while cloning the snapshot " + snapshot.getName() 764 + " as table " + tableName.getNameAsString(), e); 765 throw e; 766 } 767 LOG.info("Clone snapshot=" + snapshot.getName() + " as table=" + tableName); 768 769 if (cpHost != null) { 770 cpHost.postCloneSnapshot(snapshotPOJO, htd); 771 } 772 return procId; 773 } 774 775 /** 776 * Clone the specified snapshot into a new table. 777 * The operation will fail if the destination table has a snapshot or restore in progress. 778 * 779 * @param snapshot Snapshot Descriptor 780 * @param tableDescriptor Table Descriptor of the table to create 781 * @param nonceKey unique identifier to prevent duplicated RPC 782 * @return procId the ID of the clone snapshot procedure 783 */ 784 synchronized long cloneSnapshot(final SnapshotDescription snapshot, 785 final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl) 786 throws HBaseSnapshotException { 787 TableName tableName = tableDescriptor.getTableName(); 788 789 // make sure we aren't running a snapshot on the same table 790 if (isTakingSnapshot(tableName)) { 791 throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName); 792 } 793 794 // make sure we aren't running a restore on the same table 795 if (isRestoringTable(tableName)) { 796 throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName); 797 } 798 799 try { 800 long procId = master.getMasterProcedureExecutor().submitProcedure( 801 new CloneSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(), 802 tableDescriptor, snapshot, restoreAcl), 803 nonceKey); 804 this.restoreTableToProcIdMap.put(tableName, procId); 805 return procId; 806 } catch (Exception e) { 807 String msg = "Couldn't clone the snapshot=" 808 + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + tableName; 809 LOG.error(msg, e); 810 throw new RestoreSnapshotException(msg, e); 811 } 812 } 813 814 /** 815 * Restore or Clone the specified snapshot 816 * @param reqSnapshot 817 * @param nonceKey unique identifier to prevent duplicated RPC 818 * @throws IOException 819 */ 820 public long restoreOrCloneSnapshot(final SnapshotDescription reqSnapshot, final NonceKey nonceKey, 821 final boolean restoreAcl) throws IOException { 822 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 823 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir); 824 825 // check if the snapshot exists 826 if (!fs.exists(snapshotDir)) { 827 LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist."); 828 throw new SnapshotDoesNotExistException( 829 ProtobufUtil.createSnapshotDesc(reqSnapshot)); 830 } 831 832 // Get snapshot info from file system. The reqSnapshot is a "fake" snapshotInfo with 833 // just the snapshot "name" and table name to restore. It does not contains the "real" snapshot 834 // information. 835 SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); 836 SnapshotManifest manifest = SnapshotManifest.open(master.getConfiguration(), fs, 837 snapshotDir, snapshot); 838 TableDescriptor snapshotTableDesc = manifest.getTableDescriptor(); 839 TableName tableName = TableName.valueOf(reqSnapshot.getTable()); 840 841 // sanity check the new table descriptor 842 TableDescriptorChecker.sanityCheck(master.getConfiguration(), snapshotTableDesc); 843 844 // stop tracking "abandoned" handlers 845 cleanupSentinels(); 846 847 // Verify snapshot validity 848 SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest); 849 850 // Execute the restore/clone operation 851 long procId; 852 if (master.getTableDescriptors().exists(tableName)) { 853 procId = restoreSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey, 854 restoreAcl); 855 } else { 856 procId = 857 cloneSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey, restoreAcl); 858 } 859 return procId; 860 } 861 862 /** 863 * Restore the specified snapshot. The restore will fail if the destination table has a snapshot 864 * or restore in progress. 865 * @param reqSnapshot Snapshot Descriptor from request 866 * @param tableName table to restore 867 * @param snapshot Snapshot Descriptor 868 * @param snapshotTableDesc Table Descriptor 869 * @param nonceKey unique identifier to prevent duplicated RPC 870 * @param restoreAcl true to restore acl of snapshot 871 * @return procId the ID of the restore snapshot procedure 872 * @throws IOException 873 */ 874 private long restoreSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName, 875 final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc, 876 final NonceKey nonceKey, final boolean restoreAcl) throws IOException { 877 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 878 879 if (master.getTableStateManager().isTableState( 880 TableName.valueOf(snapshot.getTable()), TableState.State.ENABLED)) { 881 throw new UnsupportedOperationException("Table '" + 882 TableName.valueOf(snapshot.getTable()) + "' must be disabled in order to " + 883 "perform a restore operation."); 884 } 885 886 // call Coprocessor pre hook 887 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 888 if (cpHost != null) { 889 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 890 cpHost.preRestoreSnapshot(snapshotPOJO, snapshotTableDesc); 891 } 892 893 long procId; 894 try { 895 procId = restoreSnapshot(snapshot, snapshotTableDesc, nonceKey, restoreAcl); 896 } catch (IOException e) { 897 LOG.error("Exception occurred while restoring the snapshot " + snapshot.getName() 898 + " as table " + tableName.getNameAsString(), e); 899 throw e; 900 } 901 LOG.info("Restore snapshot=" + snapshot.getName() + " as table=" + tableName); 902 903 if (cpHost != null) { 904 cpHost.postRestoreSnapshot(snapshotPOJO, snapshotTableDesc); 905 } 906 907 return procId; 908 } 909 910 /** 911 * Restore the specified snapshot. The restore will fail if the destination table has a snapshot 912 * or restore in progress. 913 * @param snapshot Snapshot Descriptor 914 * @param tableDescriptor Table Descriptor 915 * @param nonceKey unique identifier to prevent duplicated RPC 916 * @param restoreAcl true to restore acl of snapshot 917 * @return procId the ID of the restore snapshot procedure 918 */ 919 private synchronized long restoreSnapshot(final SnapshotDescription snapshot, 920 final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl) 921 throws HBaseSnapshotException { 922 final TableName tableName = tableDescriptor.getTableName(); 923 924 // make sure we aren't running a snapshot on the same table 925 if (isTakingSnapshot(tableName)) { 926 throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName); 927 } 928 929 // make sure we aren't running a restore on the same table 930 if (isRestoringTable(tableName)) { 931 throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName); 932 } 933 934 try { 935 long procId = master.getMasterProcedureExecutor().submitProcedure( 936 new RestoreSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(), 937 tableDescriptor, snapshot, restoreAcl), 938 nonceKey); 939 this.restoreTableToProcIdMap.put(tableName, procId); 940 return procId; 941 } catch (Exception e) { 942 String msg = "Couldn't restore the snapshot=" + ClientSnapshotDescriptionUtils.toString( 943 snapshot) + 944 " on table=" + tableName; 945 LOG.error(msg, e); 946 throw new RestoreSnapshotException(msg, e); 947 } 948 } 949 950 /** 951 * Verify if the restore of the specified table is in progress. 952 * 953 * @param tableName table under restore 954 * @return <tt>true</tt> if there is a restore in progress of the specified table. 955 */ 956 private synchronized boolean isRestoringTable(final TableName tableName) { 957 Long procId = this.restoreTableToProcIdMap.get(tableName); 958 if (procId == null) { 959 return false; 960 } 961 ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor(); 962 if (procExec.isRunning() && !procExec.isFinished(procId)) { 963 return true; 964 } else { 965 this.restoreTableToProcIdMap.remove(tableName); 966 return false; 967 } 968 } 969 970 /** 971 * Return the handler if it is currently live and has the same snapshot target name. 972 * The handler is removed from the sentinels map if completed. 973 * @param sentinels live handlers 974 * @param snapshot snapshot description 975 * @return null if doesn't match, else a live handler. 976 */ 977 private synchronized SnapshotSentinel removeSentinelIfFinished( 978 final Map<TableName, SnapshotSentinel> sentinels, 979 final SnapshotDescription snapshot) { 980 if (!snapshot.hasTable()) { 981 return null; 982 } 983 984 TableName snapshotTable = TableName.valueOf(snapshot.getTable()); 985 SnapshotSentinel h = sentinels.get(snapshotTable); 986 if (h == null) { 987 return null; 988 } 989 990 if (!h.getSnapshot().getName().equals(snapshot.getName())) { 991 // specified snapshot is to the one currently running 992 return null; 993 } 994 995 // Remove from the "in-progress" list once completed 996 if (h.isFinished()) { 997 sentinels.remove(snapshotTable); 998 } 999 1000 return h; 1001 } 1002 1003 /** 1004 * Removes "abandoned" snapshot/restore requests. 1005 * As part of the HBaseAdmin snapshot/restore API the operation status is checked until completed, 1006 * and the in-progress maps are cleaned up when the status of a completed task is requested. 1007 * To avoid having sentinels staying around for long time if something client side is failed, 1008 * each operation tries to clean up the in-progress maps sentinels finished from a long time. 1009 */ 1010 private void cleanupSentinels() { 1011 cleanupSentinels(this.snapshotHandlers); 1012 cleanupCompletedRestoreInMap(); 1013 } 1014 1015 /** 1016 * Remove the sentinels that are marked as finished and the completion time 1017 * has exceeded the removal timeout. 1018 * @param sentinels map of sentinels to clean 1019 */ 1020 private synchronized void cleanupSentinels(final Map<TableName, SnapshotSentinel> sentinels) { 1021 long currentTime = EnvironmentEdgeManager.currentTime(); 1022 long sentinelsCleanupTimeoutMillis = 1023 master.getConfiguration().getLong(HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS, 1024 SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT); 1025 Iterator<Map.Entry<TableName, SnapshotSentinel>> it = sentinels.entrySet().iterator(); 1026 while (it.hasNext()) { 1027 Map.Entry<TableName, SnapshotSentinel> entry = it.next(); 1028 SnapshotSentinel sentinel = entry.getValue(); 1029 if (sentinel.isFinished() 1030 && (currentTime - sentinel.getCompletionTimestamp()) > sentinelsCleanupTimeoutMillis) { 1031 it.remove(); 1032 } 1033 } 1034 } 1035 1036 /** 1037 * Remove the procedures that are marked as finished 1038 */ 1039 private synchronized void cleanupCompletedRestoreInMap() { 1040 ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor(); 1041 Iterator<Map.Entry<TableName, Long>> it = restoreTableToProcIdMap.entrySet().iterator(); 1042 while (it.hasNext()) { 1043 Map.Entry<TableName, Long> entry = it.next(); 1044 Long procId = entry.getValue(); 1045 if (procExec.isRunning() && procExec.isFinished(procId)) { 1046 it.remove(); 1047 } 1048 } 1049 } 1050 1051 // 1052 // Implementing Stoppable interface 1053 // 1054 1055 @Override 1056 public void stop(String why) { 1057 // short circuit 1058 if (this.stopped) return; 1059 // make sure we get stop 1060 this.stopped = true; 1061 // pass the stop onto take snapshot handlers 1062 for (SnapshotSentinel snapshotHandler: this.snapshotHandlers.values()) { 1063 snapshotHandler.cancel(why); 1064 } 1065 if (snapshotHandlerChoreCleanerTask != null) { 1066 snapshotHandlerChoreCleanerTask.cancel(true); 1067 } 1068 try { 1069 if (coordinator != null) { 1070 coordinator.close(); 1071 } 1072 } catch (IOException e) { 1073 LOG.error("stop ProcedureCoordinator error", e); 1074 } 1075 } 1076 1077 @Override 1078 public boolean isStopped() { 1079 return this.stopped; 1080 } 1081 1082 /** 1083 * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported. 1084 * Called at the beginning of snapshot() and restoreSnapshot() methods. 1085 * @throws UnsupportedOperationException if snapshot are not supported 1086 */ 1087 public void checkSnapshotSupport() throws UnsupportedOperationException { 1088 if (!this.isSnapshotSupported) { 1089 throw new UnsupportedOperationException( 1090 "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" + 1091 HBASE_SNAPSHOT_ENABLED + "' property with value 'true'."); 1092 } 1093 } 1094 1095 /** 1096 * Called at startup, to verify if snapshot operation is supported, and to avoid 1097 * starting the master if there're snapshots present but the cleaners needed are missing. 1098 * Otherwise we can end up with snapshot data loss. 1099 * @param conf The {@link Configuration} object to use 1100 * @param mfs The MasterFileSystem to use 1101 * @throws IOException in case of file-system operation failure 1102 * @throws UnsupportedOperationException in case cleaners are missing and 1103 * there're snapshot in the system 1104 */ 1105 private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs) 1106 throws IOException, UnsupportedOperationException { 1107 // Verify if snapshot is disabled by the user 1108 String enabled = conf.get(HBASE_SNAPSHOT_ENABLED); 1109 boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false); 1110 boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled); 1111 1112 // Extract cleaners from conf 1113 Set<String> hfileCleaners = new HashSet<>(); 1114 String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS); 1115 if (cleaners != null) Collections.addAll(hfileCleaners, cleaners); 1116 1117 Set<String> logCleaners = new HashSet<>(); 1118 cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS); 1119 if (cleaners != null) Collections.addAll(logCleaners, cleaners); 1120 1121 // check if an older version of snapshot directory was present 1122 Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME); 1123 FileSystem fs = mfs.getFileSystem(); 1124 List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir), false); 1125 if (ss != null && !ss.isEmpty()) { 1126 LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir); 1127 LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME); 1128 } 1129 1130 // If the user has enabled the snapshot, we force the cleaners to be present 1131 // otherwise we still need to check if cleaners are enabled or not and verify 1132 // that there're no snapshot in the .snapshot folder. 1133 if (snapshotEnabled) { 1134 // Inject snapshot cleaners, if snapshot.enable is true 1135 hfileCleaners.add(SnapshotHFileCleaner.class.getName()); 1136 hfileCleaners.add(HFileLinkCleaner.class.getName()); 1137 // If sync acl to HDFS feature is enabled, then inject the cleaner 1138 if (SnapshotScannerHDFSAclHelper.isAclSyncToHdfsEnabled(conf)) { 1139 hfileCleaners.add(SnapshotScannerHDFSAclCleaner.class.getName()); 1140 } 1141 1142 // Set cleaners conf 1143 conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, 1144 hfileCleaners.toArray(new String[hfileCleaners.size()])); 1145 conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, 1146 logCleaners.toArray(new String[logCleaners.size()])); 1147 } else { 1148 // Verify if cleaners are present 1149 snapshotEnabled = 1150 hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) && 1151 hfileCleaners.contains(HFileLinkCleaner.class.getName()); 1152 1153 // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set. 1154 if (snapshotEnabled) { 1155 LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " + 1156 "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " + 1157 (userDisabled ? "is set to 'false'." : "is not set.")); 1158 } 1159 } 1160 1161 // Mark snapshot feature as enabled if cleaners are present and user has not disabled it. 1162 this.isSnapshotSupported = snapshotEnabled && !userDisabled; 1163 1164 // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder 1165 // otherwise we end up with snapshot data loss. 1166 if (!snapshotEnabled) { 1167 LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners."); 1168 Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir()); 1169 if (fs.exists(snapshotDir)) { 1170 FileStatus[] snapshots = CommonFSUtils.listStatus(fs, snapshotDir, 1171 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 1172 if (snapshots != null) { 1173 LOG.error("Snapshots are present, but cleaners are not enabled."); 1174 checkSnapshotSupport(); 1175 } 1176 } 1177 } 1178 } 1179 1180 @Override 1181 public void initialize(MasterServices master, MetricsMaster metricsMaster) throws KeeperException, 1182 IOException, UnsupportedOperationException { 1183 this.master = master; 1184 1185 this.rootDir = master.getMasterFileSystem().getRootDir(); 1186 checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem()); 1187 1188 // get the configuration for the coordinator 1189 Configuration conf = master.getConfiguration(); 1190 long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT); 1191 long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY, 1192 SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT), 1193 conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS, 1194 SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME)); 1195 int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT); 1196 1197 // setup the default procedure coordinator 1198 String name = master.getServerName().toString(); 1199 ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, opThreads); 1200 ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinator( 1201 master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name); 1202 1203 this.coordinator = new ProcedureCoordinator(comms, tpool, timeoutMillis, wakeFrequency); 1204 this.executorService = master.getExecutorService(); 1205 resetTempDir(); 1206 snapshotHandlerChoreCleanerTask = 1207 scheduleThreadPool.scheduleAtFixedRate(this::cleanupSentinels, 10, 10, TimeUnit.SECONDS); 1208 } 1209 1210 @Override 1211 public String getProcedureSignature() { 1212 return ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION; 1213 } 1214 1215 @Override 1216 public void execProcedure(ProcedureDescription desc) throws IOException { 1217 takeSnapshot(toSnapshotDescription(desc)); 1218 } 1219 1220 @Override 1221 public void checkPermissions(ProcedureDescription desc, AccessChecker accessChecker, User user) 1222 throws IOException { 1223 // Done by AccessController as part of preSnapshot coprocessor hook (legacy code path). 1224 // In future, when we AC is removed for good, that check should be moved here. 1225 } 1226 1227 @Override 1228 public boolean isProcedureDone(ProcedureDescription desc) throws IOException { 1229 return isSnapshotDone(toSnapshotDescription(desc)); 1230 } 1231 1232 private SnapshotDescription toSnapshotDescription(ProcedureDescription desc) 1233 throws IOException { 1234 SnapshotDescription.Builder builder = SnapshotDescription.newBuilder(); 1235 if (!desc.hasInstance()) { 1236 throw new IOException("Snapshot name is not defined: " + desc.toString()); 1237 } 1238 String snapshotName = desc.getInstance(); 1239 List<NameStringPair> props = desc.getConfigurationList(); 1240 String table = null; 1241 for (NameStringPair prop : props) { 1242 if ("table".equalsIgnoreCase(prop.getName())) { 1243 table = prop.getValue(); 1244 } 1245 } 1246 if (table == null) { 1247 throw new IOException("Snapshot table is not defined: " + desc.toString()); 1248 } 1249 TableName tableName = TableName.valueOf(table); 1250 builder.setTable(tableName.getNameAsString()); 1251 builder.setName(snapshotName); 1252 builder.setType(SnapshotDescription.Type.FLUSH); 1253 return builder.build(); 1254 } 1255}