001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.snapshot; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Collections; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.Iterator; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030import java.util.concurrent.ConcurrentHashMap; 031import java.util.concurrent.Executors; 032import java.util.concurrent.ScheduledExecutorService; 033import java.util.concurrent.ScheduledFuture; 034import java.util.concurrent.ThreadPoolExecutor; 035import java.util.concurrent.TimeUnit; 036import java.util.concurrent.locks.ReadWriteLock; 037import java.util.concurrent.locks.ReentrantReadWriteLock; 038import org.apache.hadoop.conf.Configuration; 039import org.apache.hadoop.fs.FSDataInputStream; 040import org.apache.hadoop.fs.FileStatus; 041import org.apache.hadoop.fs.FileSystem; 042import org.apache.hadoop.fs.Path; 043import org.apache.hadoop.hbase.HBaseInterfaceAudience; 044import org.apache.hadoop.hbase.HConstants; 045import org.apache.hadoop.hbase.MetaTableAccessor; 046import org.apache.hadoop.hbase.Stoppable; 047import org.apache.hadoop.hbase.TableName; 048import org.apache.hadoop.hbase.client.TableDescriptor; 049import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 050import org.apache.hadoop.hbase.client.TableState; 051import org.apache.hadoop.hbase.errorhandling.ForeignException; 052import org.apache.hadoop.hbase.executor.ExecutorService; 053import org.apache.hadoop.hbase.ipc.RpcServer; 054import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 055import org.apache.hadoop.hbase.master.MasterFileSystem; 056import org.apache.hadoop.hbase.master.MasterServices; 057import org.apache.hadoop.hbase.master.MetricsMaster; 058import org.apache.hadoop.hbase.master.SnapshotSentinel; 059import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; 060import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner; 061import org.apache.hadoop.hbase.master.procedure.CloneSnapshotProcedure; 062import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 063import org.apache.hadoop.hbase.master.procedure.RestoreSnapshotProcedure; 064import org.apache.hadoop.hbase.procedure.MasterProcedureManager; 065import org.apache.hadoop.hbase.procedure.Procedure; 066import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; 067import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs; 068import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinator; 069import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 070import org.apache.hadoop.hbase.security.AccessDeniedException; 071import org.apache.hadoop.hbase.security.User; 072import org.apache.hadoop.hbase.security.access.AccessChecker; 073import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclCleaner; 074import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper; 075import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 076import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; 077import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; 078import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; 079import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 080import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException; 081import org.apache.hadoop.hbase.snapshot.SnapshotExistsException; 082import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 083import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; 084import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException; 085import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; 086import org.apache.hadoop.hbase.util.CommonFSUtils; 087import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 088import org.apache.hadoop.hbase.util.NonceKey; 089import org.apache.hadoop.hbase.util.TableDescriptorChecker; 090import org.apache.yetus.audience.InterfaceAudience; 091import org.apache.yetus.audience.InterfaceStability; 092import org.apache.zookeeper.KeeperException; 093import org.slf4j.Logger; 094import org.slf4j.LoggerFactory; 095 096import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 097import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; 098 099import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 100import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameStringPair; 101import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ProcedureDescription; 102import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 103import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription.Type; 104 105/** 106 * This class manages the procedure of taking and restoring snapshots. There is only one 107 * SnapshotManager for the master. 108 * <p> 109 * The class provides methods for monitoring in-progress snapshot actions. 110 * <p> 111 * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a 112 * simplification in the current implementation. 113 */ 114@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) 115@InterfaceStability.Unstable 116public class SnapshotManager extends MasterProcedureManager implements Stoppable { 117 private static final Logger LOG = LoggerFactory.getLogger(SnapshotManager.class); 118 119 /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */ 120 private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500; 121 122 /** 123 * Wait time before removing a finished sentinel from the in-progress map 124 * 125 * NOTE: This is used as a safety auto cleanup. 126 * The snapshot and restore handlers map entries are removed when a user asks if a snapshot or 127 * restore is completed. This operation is part of the HBaseAdmin snapshot/restore API flow. 128 * In case something fails on the client side and the snapshot/restore state is not reclaimed 129 * after a default timeout, the entry is removed from the in-progress map. 130 * At this point, if the user asks for the snapshot/restore status, the result will be 131 * snapshot done if exists or failed if it doesn't exists. 132 */ 133 public static final String HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS = 134 "hbase.snapshot.sentinels.cleanup.timeoutMillis"; 135 public static final long SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT = 60 * 1000L; 136 137 /** Enable or disable snapshot support */ 138 public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled"; 139 140 /** 141 * Conf key for # of ms elapsed between checks for snapshot errors while waiting for 142 * completion. 143 */ 144 private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis"; 145 146 /** Name of the operation to use in the controller */ 147 public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot"; 148 149 /** Conf key for # of threads used by the SnapshotManager thread pool */ 150 public static final String SNAPSHOT_POOL_THREADS_KEY = "hbase.snapshot.master.threads"; 151 152 /** number of current operations running on the master */ 153 public static final int SNAPSHOT_POOL_THREADS_DEFAULT = 1; 154 155 private boolean stopped; 156 private MasterServices master; // Needed by TableEventHandlers 157 private ProcedureCoordinator coordinator; 158 159 // Is snapshot feature enabled? 160 private boolean isSnapshotSupported = false; 161 162 // Snapshot handlers map, with table name as key. 163 // The map is always accessed and modified under the object lock using synchronized. 164 // snapshotTable() will insert an Handler in the table. 165 // isSnapshotDone() will remove the handler requested if the operation is finished. 166 private final Map<TableName, SnapshotSentinel> snapshotHandlers = new ConcurrentHashMap<>(); 167 private final ScheduledExecutorService scheduleThreadPool = 168 Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder() 169 .setNameFormat("SnapshotHandlerChoreCleaner").setDaemon(true).build()); 170 private ScheduledFuture<?> snapshotHandlerChoreCleanerTask; 171 172 // Restore map, with table name as key, procedure ID as value. 173 // The map is always accessed and modified under the object lock using synchronized. 174 // restoreSnapshot()/cloneSnapshot() will insert a procedure ID in the map. 175 // 176 // TODO: just as the Apache HBase 1.x implementation, this map would not survive master 177 // restart/failover. This is just a stopgap implementation until implementation of taking 178 // snapshot using Procedure-V2. 179 private Map<TableName, Long> restoreTableToProcIdMap = new HashMap<>(); 180 181 private Path rootDir; 182 private ExecutorService executorService; 183 184 /** 185 * Read write lock between taking snapshot and snapshot HFile cleaner. The cleaner should skip to 186 * check the HFiles if any snapshot is in progress, otherwise it may clean a HFile which would 187 * belongs to the newly creating snapshot. So we should grab the write lock first when cleaner 188 * start to work. (See HBASE-21387) 189 */ 190 private ReentrantReadWriteLock takingSnapshotLock = new ReentrantReadWriteLock(true); 191 192 public SnapshotManager() {} 193 194 /** 195 * Fully specify all necessary components of a snapshot manager. Exposed for testing. 196 * @param master services for the master where the manager is running 197 * @param coordinator procedure coordinator instance. exposed for testing. 198 * @param pool HBase ExecutorServcie instance, exposed for testing. 199 */ 200 @VisibleForTesting 201 SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator, 202 ExecutorService pool, int sentinelCleanInterval) 203 throws IOException, UnsupportedOperationException { 204 this.master = master; 205 206 this.rootDir = master.getMasterFileSystem().getRootDir(); 207 Configuration conf = master.getConfiguration(); 208 checkSnapshotSupport(conf, master.getMasterFileSystem()); 209 210 this.coordinator = coordinator; 211 this.executorService = pool; 212 resetTempDir(); 213 snapshotHandlerChoreCleanerTask = this.scheduleThreadPool.scheduleAtFixedRate( 214 this::cleanupSentinels, sentinelCleanInterval, sentinelCleanInterval, TimeUnit.SECONDS); 215 } 216 217 /** 218 * Gets the list of all completed snapshots. 219 * @return list of SnapshotDescriptions 220 * @throws IOException File system exception 221 */ 222 public List<SnapshotDescription> getCompletedSnapshots() throws IOException { 223 return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir), true); 224 } 225 226 /** 227 * Gets the list of all completed snapshots. 228 * @param snapshotDir snapshot directory 229 * @param withCpCall Whether to call CP hooks 230 * @return list of SnapshotDescriptions 231 * @throws IOException File system exception 232 */ 233 private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir, boolean withCpCall) 234 throws IOException { 235 List<SnapshotDescription> snapshotDescs = new ArrayList<>(); 236 // first create the snapshot root path and check to see if it exists 237 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 238 if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); 239 240 // if there are no snapshots, return an empty list 241 if (!fs.exists(snapshotDir)) { 242 return snapshotDescs; 243 } 244 245 // ignore all the snapshots in progress 246 FileStatus[] snapshots = fs.listStatus(snapshotDir, 247 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 248 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 249 withCpCall = withCpCall && cpHost != null; 250 // loop through all the completed snapshots 251 for (FileStatus snapshot : snapshots) { 252 Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); 253 // if the snapshot is bad 254 if (!fs.exists(info)) { 255 LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist"); 256 continue; 257 } 258 FSDataInputStream in = null; 259 try { 260 in = fs.open(info); 261 SnapshotDescription desc = SnapshotDescription.parseFrom(in); 262 org.apache.hadoop.hbase.client.SnapshotDescription descPOJO = (withCpCall) 263 ? ProtobufUtil.createSnapshotDesc(desc) : null; 264 if (withCpCall) { 265 try { 266 cpHost.preListSnapshot(descPOJO); 267 } catch (AccessDeniedException e) { 268 LOG.warn("Current user does not have access to " + desc.getName() + " snapshot. " 269 + "Either you should be owner of this snapshot or admin user."); 270 // Skip this and try for next snapshot 271 continue; 272 } 273 } 274 snapshotDescs.add(desc); 275 276 // call coproc post hook 277 if (withCpCall) { 278 cpHost.postListSnapshot(descPOJO); 279 } 280 } catch (IOException e) { 281 LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e); 282 } finally { 283 if (in != null) { 284 in.close(); 285 } 286 } 287 } 288 return snapshotDescs; 289 } 290 291 /** 292 * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed 293 * snapshot attempts. 294 * 295 * @throws IOException if we can't reach the filesystem 296 */ 297 private void resetTempDir() throws IOException { 298 // cleanup any existing snapshots. 299 Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir, 300 master.getConfiguration()); 301 FileSystem tmpFs = tmpdir.getFileSystem(master.getConfiguration()); 302 if (!tmpFs.delete(tmpdir, true)) { 303 LOG.warn("Couldn't delete working snapshot directory: " + tmpdir); 304 } 305 } 306 307 /** 308 * Delete the specified snapshot 309 * @param snapshot 310 * @throws SnapshotDoesNotExistException If the specified snapshot does not exist. 311 * @throws IOException For filesystem IOExceptions 312 */ 313 public void deleteSnapshot(SnapshotDescription snapshot) throws IOException { 314 // check to see if it is completed 315 if (!isSnapshotCompleted(snapshot)) { 316 throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(snapshot)); 317 } 318 319 String snapshotName = snapshot.getName(); 320 // first create the snapshot description and check to see if it exists 321 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 322 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); 323 // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with 324 // just the "name" and it does not contains the "real" snapshot information 325 snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); 326 327 // call coproc pre hook 328 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 329 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 330 if (cpHost != null) { 331 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 332 cpHost.preDeleteSnapshot(snapshotPOJO); 333 } 334 335 LOG.debug("Deleting snapshot: " + snapshotName); 336 // delete the existing snapshot 337 if (!fs.delete(snapshotDir, true)) { 338 throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir); 339 } 340 341 // call coproc post hook 342 if (cpHost != null) { 343 cpHost.postDeleteSnapshot(snapshotPOJO); 344 } 345 346 } 347 348 /** 349 * Check if the specified snapshot is done 350 * 351 * @param expected 352 * @return true if snapshot is ready to be restored, false if it is still being taken. 353 * @throws IOException IOException if error from HDFS or RPC 354 * @throws UnknownSnapshotException if snapshot is invalid or does not exist. 355 */ 356 public boolean isSnapshotDone(SnapshotDescription expected) throws IOException { 357 // check the request to make sure it has a snapshot 358 if (expected == null) { 359 throw new UnknownSnapshotException( 360 "No snapshot name passed in request, can't figure out which snapshot you want to check."); 361 } 362 363 String ssString = ClientSnapshotDescriptionUtils.toString(expected); 364 365 // check to see if the sentinel exists, 366 // and if the task is complete removes it from the in-progress snapshots map. 367 SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected); 368 369 // stop tracking "abandoned" handlers 370 cleanupSentinels(); 371 372 if (handler == null) { 373 // If there's no handler in the in-progress map, it means one of the following: 374 // - someone has already requested the snapshot state 375 // - the requested snapshot was completed long time ago (cleanupSentinels() timeout) 376 // - the snapshot was never requested 377 // In those cases returns to the user the "done state" if the snapshots exists on disk, 378 // otherwise raise an exception saying that the snapshot is not running and doesn't exist. 379 if (!isSnapshotCompleted(expected)) { 380 throw new UnknownSnapshotException("Snapshot " + ssString 381 + " is not currently running or one of the known completed snapshots."); 382 } 383 // was done, return true; 384 return true; 385 } 386 387 // pass on any failure we find in the sentinel 388 try { 389 handler.rethrowExceptionIfFailed(); 390 } catch (ForeignException e) { 391 // Give some procedure info on an exception. 392 String status; 393 Procedure p = coordinator.getProcedure(expected.getName()); 394 if (p != null) { 395 status = p.getStatus(); 396 } else { 397 status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames(); 398 } 399 throw new HBaseSnapshotException("Snapshot " + ssString + " had an error. " + status, e, 400 ProtobufUtil.createSnapshotDesc(expected)); 401 } 402 403 // check to see if we are done 404 if (handler.isFinished()) { 405 LOG.debug("Snapshot '" + ssString + "' has completed, notifying client."); 406 return true; 407 } else if (LOG.isDebugEnabled()) { 408 LOG.debug("Snapshoting '" + ssString + "' is still in progress!"); 409 } 410 return false; 411 } 412 413 /** 414 * Check to see if there is a snapshot in progress with the same name or on the same table. 415 * Currently we have a limitation only allowing a single snapshot per table at a time. Also we 416 * don't allow snapshot with the same name. 417 * @param snapshot description of the snapshot being checked. 418 * @return <tt>true</tt> if there is a snapshot in progress with the same name or on the same 419 * table. 420 */ 421 synchronized boolean isTakingSnapshot(final SnapshotDescription snapshot) { 422 TableName snapshotTable = TableName.valueOf(snapshot.getTable()); 423 if (isTakingSnapshot(snapshotTable)) { 424 return true; 425 } 426 Iterator<Map.Entry<TableName, SnapshotSentinel>> it = this.snapshotHandlers.entrySet().iterator(); 427 while (it.hasNext()) { 428 Map.Entry<TableName, SnapshotSentinel> entry = it.next(); 429 SnapshotSentinel sentinel = entry.getValue(); 430 if (snapshot.getName().equals(sentinel.getSnapshot().getName()) && !sentinel.isFinished()) { 431 return true; 432 } 433 } 434 return false; 435 } 436 437 /** 438 * Check to see if the specified table has a snapshot in progress. Currently we have a 439 * limitation only allowing a single snapshot per table at a time. 440 * @param tableName name of the table being snapshotted. 441 * @return <tt>true</tt> if there is a snapshot in progress on the specified table. 442 */ 443 public boolean isTakingSnapshot(final TableName tableName) { 444 SnapshotSentinel handler = this.snapshotHandlers.get(tableName); 445 return handler != null && !handler.isFinished(); 446 } 447 448 /** 449 * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we 450 * aren't already running a snapshot or restore on the requested table. 451 * @param snapshot description of the snapshot we want to start 452 * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot 453 */ 454 private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot) 455 throws HBaseSnapshotException { 456 Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, 457 master.getConfiguration()); 458 TableName snapshotTable = 459 TableName.valueOf(snapshot.getTable()); 460 461 // make sure we aren't already running a snapshot 462 if (isTakingSnapshot(snapshot)) { 463 SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable); 464 throw new SnapshotCreationException("Rejected taking " 465 + ClientSnapshotDescriptionUtils.toString(snapshot) 466 + " because we are already running another snapshot " 467 + (handler != null ? ("on the same table " + 468 ClientSnapshotDescriptionUtils.toString(handler.getSnapshot())) 469 : "with the same name"), ProtobufUtil.createSnapshotDesc(snapshot)); 470 } 471 472 // make sure we aren't running a restore on the same table 473 if (isRestoringTable(snapshotTable)) { 474 throw new SnapshotCreationException("Rejected taking " 475 + ClientSnapshotDescriptionUtils.toString(snapshot) 476 + " because we are already have a restore in progress on the same snapshot."); 477 } 478 479 try { 480 FileSystem workingDirFS = workingDir.getFileSystem(master.getConfiguration()); 481 // delete the working directory, since we aren't running the snapshot. Likely leftovers 482 // from a failed attempt. 483 workingDirFS.delete(workingDir, true); 484 485 // recreate the working directory for the snapshot 486 if (!workingDirFS.mkdirs(workingDir)) { 487 throw new SnapshotCreationException("Couldn't create working directory (" + workingDir 488 + ") for snapshot" , ProtobufUtil.createSnapshotDesc(snapshot)); 489 } 490 } catch (HBaseSnapshotException e) { 491 throw e; 492 } catch (IOException e) { 493 throw new SnapshotCreationException( 494 "Exception while checking to see if snapshot could be started.", e, 495 ProtobufUtil.createSnapshotDesc(snapshot)); 496 } 497 } 498 499 /** 500 * Take a snapshot of a disabled table. 501 * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}. 502 * @throws IOException if the snapshot could not be started or filesystem for snapshot 503 * temporary directory could not be determined 504 */ 505 private synchronized void snapshotDisabledTable(SnapshotDescription snapshot) 506 throws IOException { 507 // setup the snapshot 508 prepareToTakeSnapshot(snapshot); 509 510 // set the snapshot to be a disabled snapshot, since the client doesn't know about that 511 snapshot = snapshot.toBuilder().setType(Type.DISABLED).build(); 512 513 // Take the snapshot of the disabled table 514 DisabledTableSnapshotHandler handler = 515 new DisabledTableSnapshotHandler(snapshot, master, this); 516 snapshotTable(snapshot, handler); 517 } 518 519 /** 520 * Take a snapshot of an enabled table. 521 * @param snapshot description of the snapshot to take. 522 * @throws IOException if the snapshot could not be started or filesystem for snapshot 523 * temporary directory could not be determined 524 */ 525 private synchronized void snapshotEnabledTable(SnapshotDescription snapshot) 526 throws IOException { 527 // setup the snapshot 528 prepareToTakeSnapshot(snapshot); 529 530 // Take the snapshot of the enabled table 531 EnabledTableSnapshotHandler handler = 532 new EnabledTableSnapshotHandler(snapshot, master, this); 533 snapshotTable(snapshot, handler); 534 } 535 536 /** 537 * Take a snapshot using the specified handler. 538 * On failure the snapshot temporary working directory is removed. 539 * NOTE: prepareToTakeSnapshot() called before this one takes care of the rejecting the 540 * snapshot request if the table is busy with another snapshot/restore operation. 541 * @param snapshot the snapshot description 542 * @param handler the snapshot handler 543 */ 544 private synchronized void snapshotTable(SnapshotDescription snapshot, 545 final TakeSnapshotHandler handler) throws IOException { 546 try { 547 handler.prepare(); 548 this.executorService.submit(handler); 549 this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler); 550 } catch (Exception e) { 551 // cleanup the working directory by trying to delete it from the fs. 552 Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, 553 master.getConfiguration()); 554 FileSystem workingDirFs = workingDir.getFileSystem(master.getConfiguration()); 555 try { 556 if (!workingDirFs.delete(workingDir, true)) { 557 LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" + 558 ClientSnapshotDescriptionUtils.toString(snapshot)); 559 } 560 } catch (IOException e1) { 561 LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" + 562 ClientSnapshotDescriptionUtils.toString(snapshot)); 563 } 564 // fail the snapshot 565 throw new SnapshotCreationException("Could not build snapshot handler", e, 566 ProtobufUtil.createSnapshotDesc(snapshot)); 567 } 568 } 569 570 public ReadWriteLock getTakingSnapshotLock() { 571 return this.takingSnapshotLock; 572 } 573 574 /** 575 * The snapshot operation processing as following: <br> 576 * 1. Create a Snapshot Handler, and do some initialization; <br> 577 * 2. Put the handler into snapshotHandlers <br> 578 * So when we consider if any snapshot is taking, we should consider both the takingSnapshotLock 579 * and snapshotHandlers; 580 * @return true to indicate that there're some running snapshots. 581 */ 582 public synchronized boolean isTakingAnySnapshot() { 583 return this.takingSnapshotLock.getReadHoldCount() > 0 || this.snapshotHandlers.size() > 0; 584 } 585 586 /** 587 * Take a snapshot based on the enabled/disabled state of the table. 588 * @param snapshot 589 * @throws HBaseSnapshotException when a snapshot specific exception occurs. 590 * @throws IOException when some sort of generic IO exception occurs. 591 */ 592 public void takeSnapshot(SnapshotDescription snapshot) throws IOException { 593 this.takingSnapshotLock.readLock().lock(); 594 try { 595 takeSnapshotInternal(snapshot); 596 } finally { 597 this.takingSnapshotLock.readLock().unlock(); 598 } 599 } 600 601 private void takeSnapshotInternal(SnapshotDescription snapshot) throws IOException { 602 // check to see if we already completed the snapshot 603 if (isSnapshotCompleted(snapshot)) { 604 throw new SnapshotExistsException( 605 "Snapshot '" + snapshot.getName() + "' already stored on the filesystem.", 606 ProtobufUtil.createSnapshotDesc(snapshot)); 607 } 608 609 LOG.debug("No existing snapshot, attempting snapshot..."); 610 611 // stop tracking "abandoned" handlers 612 cleanupSentinels(); 613 614 // check to see if the table exists 615 TableDescriptor desc = null; 616 try { 617 desc = master.getTableDescriptors().get( 618 TableName.valueOf(snapshot.getTable())); 619 } catch (FileNotFoundException e) { 620 String msg = "Table:" + snapshot.getTable() + " info doesn't exist!"; 621 LOG.error(msg); 622 throw new SnapshotCreationException(msg, e, ProtobufUtil.createSnapshotDesc(snapshot)); 623 } catch (IOException e) { 624 throw new SnapshotCreationException( 625 "Error while geting table description for table " + snapshot.getTable(), e, 626 ProtobufUtil.createSnapshotDesc(snapshot)); 627 } 628 if (desc == null) { 629 throw new SnapshotCreationException( 630 "Table '" + snapshot.getTable() + "' doesn't exist, can't take snapshot.", 631 ProtobufUtil.createSnapshotDesc(snapshot)); 632 } 633 SnapshotDescription.Builder builder = snapshot.toBuilder(); 634 // if not specified, set the snapshot format 635 if (!snapshot.hasVersion()) { 636 builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION); 637 } 638 RpcServer.getRequestUser().ifPresent(user -> { 639 if (AccessChecker.isAuthorizationSupported(master.getConfiguration())) { 640 builder.setOwner(user.getShortName()); 641 } 642 }); 643 snapshot = builder.build(); 644 645 // call pre coproc hook 646 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 647 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 648 if (cpHost != null) { 649 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 650 cpHost.preSnapshot(snapshotPOJO, desc); 651 } 652 653 // if the table is enabled, then have the RS run actually the snapshot work 654 TableName snapshotTable = TableName.valueOf(snapshot.getTable()); 655 if (master.getTableStateManager().isTableState(snapshotTable, 656 TableState.State.ENABLED)) { 657 LOG.debug("Table enabled, starting distributed snapshot."); 658 snapshotEnabledTable(snapshot); 659 LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot)); 660 } 661 // For disabled table, snapshot is created by the master 662 else if (master.getTableStateManager().isTableState(snapshotTable, 663 TableState.State.DISABLED)) { 664 LOG.debug("Table is disabled, running snapshot entirely on master."); 665 snapshotDisabledTable(snapshot); 666 LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot)); 667 } else { 668 LOG.error("Can't snapshot table '" + snapshot.getTable() 669 + "', isn't open or closed, we don't know what to do!"); 670 TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable() 671 + " isn't fully open."); 672 throw new SnapshotCreationException("Table is not entirely open or closed", tpoe, 673 ProtobufUtil.createSnapshotDesc(snapshot)); 674 } 675 676 // call post coproc hook 677 if (cpHost != null) { 678 cpHost.postSnapshot(snapshotPOJO, desc); 679 } 680 } 681 682 /** 683 * Set the handler for the current snapshot 684 * <p> 685 * Exposed for TESTING 686 * @param tableName 687 * @param handler handler the master should use 688 * 689 * TODO get rid of this if possible, repackaging, modify tests. 690 */ 691 public synchronized void setSnapshotHandlerForTesting( 692 final TableName tableName, 693 final SnapshotSentinel handler) { 694 if (handler != null) { 695 this.snapshotHandlers.put(tableName, handler); 696 } else { 697 this.snapshotHandlers.remove(tableName); 698 } 699 } 700 701 /** 702 * @return distributed commit coordinator for all running snapshots 703 */ 704 ProcedureCoordinator getCoordinator() { 705 return coordinator; 706 } 707 708 /** 709 * Check to see if the snapshot is one of the currently completed snapshots 710 * Returns true if the snapshot exists in the "completed snapshots folder". 711 * 712 * @param snapshot expected snapshot to check 713 * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is 714 * not stored 715 * @throws IOException if the filesystem throws an unexpected exception, 716 * @throws IllegalArgumentException if snapshot name is invalid. 717 */ 718 private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException { 719 try { 720 final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); 721 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 722 // check to see if the snapshot already exists 723 return fs.exists(snapshotDir); 724 } catch (IllegalArgumentException iae) { 725 throw new UnknownSnapshotException("Unexpected exception thrown", iae); 726 } 727 } 728 729 /** 730 * Clone the specified snapshot. 731 * The clone will fail if the destination table has a snapshot or restore in progress. 732 * 733 * @param reqSnapshot Snapshot Descriptor from request 734 * @param tableName table to clone 735 * @param snapshot Snapshot Descriptor 736 * @param snapshotTableDesc Table Descriptor 737 * @param nonceKey unique identifier to prevent duplicated RPC 738 * @return procId the ID of the clone snapshot procedure 739 * @throws IOException 740 */ 741 private long cloneSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName, 742 final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc, 743 final NonceKey nonceKey, final boolean restoreAcl) throws IOException { 744 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 745 TableDescriptor htd = TableDescriptorBuilder.copy(tableName, snapshotTableDesc); 746 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 747 if (cpHost != null) { 748 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 749 cpHost.preCloneSnapshot(snapshotPOJO, htd); 750 } 751 long procId; 752 try { 753 procId = cloneSnapshot(snapshot, htd, nonceKey, restoreAcl); 754 } catch (IOException e) { 755 LOG.error("Exception occurred while cloning the snapshot " + snapshot.getName() 756 + " as table " + tableName.getNameAsString(), e); 757 throw e; 758 } 759 LOG.info("Clone snapshot=" + snapshot.getName() + " as table=" + tableName); 760 761 if (cpHost != null) { 762 cpHost.postCloneSnapshot(snapshotPOJO, htd); 763 } 764 return procId; 765 } 766 767 /** 768 * Clone the specified snapshot into a new table. 769 * The operation will fail if the destination table has a snapshot or restore in progress. 770 * 771 * @param snapshot Snapshot Descriptor 772 * @param tableDescriptor Table Descriptor of the table to create 773 * @param nonceKey unique identifier to prevent duplicated RPC 774 * @return procId the ID of the clone snapshot procedure 775 */ 776 synchronized long cloneSnapshot(final SnapshotDescription snapshot, 777 final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl) 778 throws HBaseSnapshotException { 779 TableName tableName = tableDescriptor.getTableName(); 780 781 // make sure we aren't running a snapshot on the same table 782 if (isTakingSnapshot(tableName)) { 783 throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName); 784 } 785 786 // make sure we aren't running a restore on the same table 787 if (isRestoringTable(tableName)) { 788 throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName); 789 } 790 791 try { 792 long procId = master.getMasterProcedureExecutor().submitProcedure( 793 new CloneSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(), 794 tableDescriptor, snapshot, restoreAcl), 795 nonceKey); 796 this.restoreTableToProcIdMap.put(tableName, procId); 797 return procId; 798 } catch (Exception e) { 799 String msg = "Couldn't clone the snapshot=" 800 + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + tableName; 801 LOG.error(msg, e); 802 throw new RestoreSnapshotException(msg, e); 803 } 804 } 805 806 /** 807 * Restore or Clone the specified snapshot 808 * @param reqSnapshot 809 * @param nonceKey unique identifier to prevent duplicated RPC 810 * @throws IOException 811 */ 812 public long restoreOrCloneSnapshot(final SnapshotDescription reqSnapshot, final NonceKey nonceKey, 813 final boolean restoreAcl) throws IOException { 814 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 815 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir); 816 817 // check if the snapshot exists 818 if (!fs.exists(snapshotDir)) { 819 LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist."); 820 throw new SnapshotDoesNotExistException( 821 ProtobufUtil.createSnapshotDesc(reqSnapshot)); 822 } 823 824 // Get snapshot info from file system. The reqSnapshot is a "fake" snapshotInfo with 825 // just the snapshot "name" and table name to restore. It does not contains the "real" snapshot 826 // information. 827 SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); 828 SnapshotManifest manifest = SnapshotManifest.open(master.getConfiguration(), fs, 829 snapshotDir, snapshot); 830 TableDescriptor snapshotTableDesc = manifest.getTableDescriptor(); 831 TableName tableName = TableName.valueOf(reqSnapshot.getTable()); 832 833 // sanity check the new table descriptor 834 TableDescriptorChecker.sanityCheck(master.getConfiguration(), snapshotTableDesc); 835 836 // stop tracking "abandoned" handlers 837 cleanupSentinels(); 838 839 // Verify snapshot validity 840 SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest); 841 842 // Execute the restore/clone operation 843 long procId; 844 if (MetaTableAccessor.tableExists(master.getConnection(), tableName)) { 845 procId = restoreSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey, 846 restoreAcl); 847 } else { 848 procId = 849 cloneSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceKey, restoreAcl); 850 } 851 return procId; 852 } 853 854 /** 855 * Restore the specified snapshot. The restore will fail if the destination table has a snapshot 856 * or restore in progress. 857 * @param reqSnapshot Snapshot Descriptor from request 858 * @param tableName table to restore 859 * @param snapshot Snapshot Descriptor 860 * @param snapshotTableDesc Table Descriptor 861 * @param nonceKey unique identifier to prevent duplicated RPC 862 * @param restoreAcl true to restore acl of snapshot 863 * @return procId the ID of the restore snapshot procedure 864 * @throws IOException 865 */ 866 private long restoreSnapshot(final SnapshotDescription reqSnapshot, final TableName tableName, 867 final SnapshotDescription snapshot, final TableDescriptor snapshotTableDesc, 868 final NonceKey nonceKey, final boolean restoreAcl) throws IOException { 869 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); 870 871 if (master.getTableStateManager().isTableState( 872 TableName.valueOf(snapshot.getTable()), TableState.State.ENABLED)) { 873 throw new UnsupportedOperationException("Table '" + 874 TableName.valueOf(snapshot.getTable()) + "' must be disabled in order to " + 875 "perform a restore operation."); 876 } 877 878 // call Coprocessor pre hook 879 org.apache.hadoop.hbase.client.SnapshotDescription snapshotPOJO = null; 880 if (cpHost != null) { 881 snapshotPOJO = ProtobufUtil.createSnapshotDesc(snapshot); 882 cpHost.preRestoreSnapshot(snapshotPOJO, snapshotTableDesc); 883 } 884 885 long procId; 886 try { 887 procId = restoreSnapshot(snapshot, snapshotTableDesc, nonceKey, restoreAcl); 888 } catch (IOException e) { 889 LOG.error("Exception occurred while restoring the snapshot " + snapshot.getName() 890 + " as table " + tableName.getNameAsString(), e); 891 throw e; 892 } 893 LOG.info("Restore snapshot=" + snapshot.getName() + " as table=" + tableName); 894 895 if (cpHost != null) { 896 cpHost.postRestoreSnapshot(snapshotPOJO, snapshotTableDesc); 897 } 898 899 return procId; 900 } 901 902 /** 903 * Restore the specified snapshot. The restore will fail if the destination table has a snapshot 904 * or restore in progress. 905 * @param snapshot Snapshot Descriptor 906 * @param tableDescriptor Table Descriptor 907 * @param nonceKey unique identifier to prevent duplicated RPC 908 * @param restoreAcl true to restore acl of snapshot 909 * @return procId the ID of the restore snapshot procedure 910 */ 911 private synchronized long restoreSnapshot(final SnapshotDescription snapshot, 912 final TableDescriptor tableDescriptor, final NonceKey nonceKey, final boolean restoreAcl) 913 throws HBaseSnapshotException { 914 final TableName tableName = tableDescriptor.getTableName(); 915 916 // make sure we aren't running a snapshot on the same table 917 if (isTakingSnapshot(tableName)) { 918 throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName); 919 } 920 921 // make sure we aren't running a restore on the same table 922 if (isRestoringTable(tableName)) { 923 throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName); 924 } 925 926 try { 927 long procId = master.getMasterProcedureExecutor().submitProcedure( 928 new RestoreSnapshotProcedure(master.getMasterProcedureExecutor().getEnvironment(), 929 tableDescriptor, snapshot, restoreAcl), 930 nonceKey); 931 this.restoreTableToProcIdMap.put(tableName, procId); 932 return procId; 933 } catch (Exception e) { 934 String msg = "Couldn't restore the snapshot=" + ClientSnapshotDescriptionUtils.toString( 935 snapshot) + 936 " on table=" + tableName; 937 LOG.error(msg, e); 938 throw new RestoreSnapshotException(msg, e); 939 } 940 } 941 942 /** 943 * Verify if the restore of the specified table is in progress. 944 * 945 * @param tableName table under restore 946 * @return <tt>true</tt> if there is a restore in progress of the specified table. 947 */ 948 private synchronized boolean isRestoringTable(final TableName tableName) { 949 Long procId = this.restoreTableToProcIdMap.get(tableName); 950 if (procId == null) { 951 return false; 952 } 953 ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor(); 954 if (procExec.isRunning() && !procExec.isFinished(procId)) { 955 return true; 956 } else { 957 this.restoreTableToProcIdMap.remove(tableName); 958 return false; 959 } 960 } 961 962 /** 963 * Return the handler if it is currently live and has the same snapshot target name. 964 * The handler is removed from the sentinels map if completed. 965 * @param sentinels live handlers 966 * @param snapshot snapshot description 967 * @return null if doesn't match, else a live handler. 968 */ 969 private synchronized SnapshotSentinel removeSentinelIfFinished( 970 final Map<TableName, SnapshotSentinel> sentinels, 971 final SnapshotDescription snapshot) { 972 if (!snapshot.hasTable()) { 973 return null; 974 } 975 976 TableName snapshotTable = TableName.valueOf(snapshot.getTable()); 977 SnapshotSentinel h = sentinels.get(snapshotTable); 978 if (h == null) { 979 return null; 980 } 981 982 if (!h.getSnapshot().getName().equals(snapshot.getName())) { 983 // specified snapshot is to the one currently running 984 return null; 985 } 986 987 // Remove from the "in-progress" list once completed 988 if (h.isFinished()) { 989 sentinels.remove(snapshotTable); 990 } 991 992 return h; 993 } 994 995 /** 996 * Removes "abandoned" snapshot/restore requests. 997 * As part of the HBaseAdmin snapshot/restore API the operation status is checked until completed, 998 * and the in-progress maps are cleaned up when the status of a completed task is requested. 999 * To avoid having sentinels staying around for long time if something client side is failed, 1000 * each operation tries to clean up the in-progress maps sentinels finished from a long time. 1001 */ 1002 private void cleanupSentinels() { 1003 cleanupSentinels(this.snapshotHandlers); 1004 cleanupCompletedRestoreInMap(); 1005 } 1006 1007 /** 1008 * Remove the sentinels that are marked as finished and the completion time 1009 * has exceeded the removal timeout. 1010 * @param sentinels map of sentinels to clean 1011 */ 1012 private synchronized void cleanupSentinels(final Map<TableName, SnapshotSentinel> sentinels) { 1013 long currentTime = EnvironmentEdgeManager.currentTime(); 1014 long sentinelsCleanupTimeoutMillis = 1015 master.getConfiguration().getLong(HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS, 1016 SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT); 1017 Iterator<Map.Entry<TableName, SnapshotSentinel>> it = sentinels.entrySet().iterator(); 1018 while (it.hasNext()) { 1019 Map.Entry<TableName, SnapshotSentinel> entry = it.next(); 1020 SnapshotSentinel sentinel = entry.getValue(); 1021 if (sentinel.isFinished() 1022 && (currentTime - sentinel.getCompletionTimestamp()) > sentinelsCleanupTimeoutMillis) { 1023 it.remove(); 1024 } 1025 } 1026 } 1027 1028 /** 1029 * Remove the procedures that are marked as finished 1030 */ 1031 private synchronized void cleanupCompletedRestoreInMap() { 1032 ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor(); 1033 Iterator<Map.Entry<TableName, Long>> it = restoreTableToProcIdMap.entrySet().iterator(); 1034 while (it.hasNext()) { 1035 Map.Entry<TableName, Long> entry = it.next(); 1036 Long procId = entry.getValue(); 1037 if (procExec.isRunning() && procExec.isFinished(procId)) { 1038 it.remove(); 1039 } 1040 } 1041 } 1042 1043 // 1044 // Implementing Stoppable interface 1045 // 1046 1047 @Override 1048 public void stop(String why) { 1049 // short circuit 1050 if (this.stopped) return; 1051 // make sure we get stop 1052 this.stopped = true; 1053 // pass the stop onto take snapshot handlers 1054 for (SnapshotSentinel snapshotHandler: this.snapshotHandlers.values()) { 1055 snapshotHandler.cancel(why); 1056 } 1057 if (snapshotHandlerChoreCleanerTask != null) { 1058 snapshotHandlerChoreCleanerTask.cancel(true); 1059 } 1060 try { 1061 if (coordinator != null) { 1062 coordinator.close(); 1063 } 1064 } catch (IOException e) { 1065 LOG.error("stop ProcedureCoordinator error", e); 1066 } 1067 } 1068 1069 @Override 1070 public boolean isStopped() { 1071 return this.stopped; 1072 } 1073 1074 /** 1075 * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported. 1076 * Called at the beginning of snapshot() and restoreSnapshot() methods. 1077 * @throws UnsupportedOperationException if snapshot are not supported 1078 */ 1079 public void checkSnapshotSupport() throws UnsupportedOperationException { 1080 if (!this.isSnapshotSupported) { 1081 throw new UnsupportedOperationException( 1082 "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" + 1083 HBASE_SNAPSHOT_ENABLED + "' property with value 'true'."); 1084 } 1085 } 1086 1087 /** 1088 * Called at startup, to verify if snapshot operation is supported, and to avoid 1089 * starting the master if there're snapshots present but the cleaners needed are missing. 1090 * Otherwise we can end up with snapshot data loss. 1091 * @param conf The {@link Configuration} object to use 1092 * @param mfs The MasterFileSystem to use 1093 * @throws IOException in case of file-system operation failure 1094 * @throws UnsupportedOperationException in case cleaners are missing and 1095 * there're snapshot in the system 1096 */ 1097 private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs) 1098 throws IOException, UnsupportedOperationException { 1099 // Verify if snapshot is disabled by the user 1100 String enabled = conf.get(HBASE_SNAPSHOT_ENABLED); 1101 boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false); 1102 boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled); 1103 1104 // Extract cleaners from conf 1105 Set<String> hfileCleaners = new HashSet<>(); 1106 String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS); 1107 if (cleaners != null) Collections.addAll(hfileCleaners, cleaners); 1108 1109 Set<String> logCleaners = new HashSet<>(); 1110 cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS); 1111 if (cleaners != null) Collections.addAll(logCleaners, cleaners); 1112 1113 // check if an older version of snapshot directory was present 1114 Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME); 1115 FileSystem fs = mfs.getFileSystem(); 1116 List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir), false); 1117 if (ss != null && !ss.isEmpty()) { 1118 LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir); 1119 LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME); 1120 } 1121 1122 // If the user has enabled the snapshot, we force the cleaners to be present 1123 // otherwise we still need to check if cleaners are enabled or not and verify 1124 // that there're no snapshot in the .snapshot folder. 1125 if (snapshotEnabled) { 1126 // Inject snapshot cleaners, if snapshot.enable is true 1127 hfileCleaners.add(SnapshotHFileCleaner.class.getName()); 1128 hfileCleaners.add(HFileLinkCleaner.class.getName()); 1129 // If sync acl to HDFS feature is enabled, then inject the cleaner 1130 if (SnapshotScannerHDFSAclHelper.isAclSyncToHdfsEnabled(conf)) { 1131 hfileCleaners.add(SnapshotScannerHDFSAclCleaner.class.getName()); 1132 } 1133 1134 // Set cleaners conf 1135 conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, 1136 hfileCleaners.toArray(new String[hfileCleaners.size()])); 1137 conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, 1138 logCleaners.toArray(new String[logCleaners.size()])); 1139 } else { 1140 // Verify if cleaners are present 1141 snapshotEnabled = 1142 hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) && 1143 hfileCleaners.contains(HFileLinkCleaner.class.getName()); 1144 1145 // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set. 1146 if (snapshotEnabled) { 1147 LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " + 1148 "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " + 1149 (userDisabled ? "is set to 'false'." : "is not set.")); 1150 } 1151 } 1152 1153 // Mark snapshot feature as enabled if cleaners are present and user has not disabled it. 1154 this.isSnapshotSupported = snapshotEnabled && !userDisabled; 1155 1156 // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder 1157 // otherwise we end up with snapshot data loss. 1158 if (!snapshotEnabled) { 1159 LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners."); 1160 Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir()); 1161 if (fs.exists(snapshotDir)) { 1162 FileStatus[] snapshots = CommonFSUtils.listStatus(fs, snapshotDir, 1163 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 1164 if (snapshots != null) { 1165 LOG.error("Snapshots are present, but cleaners are not enabled."); 1166 checkSnapshotSupport(); 1167 } 1168 } 1169 } 1170 } 1171 1172 @Override 1173 public void initialize(MasterServices master, MetricsMaster metricsMaster) throws KeeperException, 1174 IOException, UnsupportedOperationException { 1175 this.master = master; 1176 1177 this.rootDir = master.getMasterFileSystem().getRootDir(); 1178 checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem()); 1179 1180 // get the configuration for the coordinator 1181 Configuration conf = master.getConfiguration(); 1182 long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT); 1183 long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY, 1184 SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT), 1185 conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS, 1186 SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME)); 1187 int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT); 1188 1189 // setup the default procedure coordinator 1190 String name = master.getServerName().toString(); 1191 ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, opThreads); 1192 ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinator( 1193 master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name); 1194 1195 this.coordinator = new ProcedureCoordinator(comms, tpool, timeoutMillis, wakeFrequency); 1196 this.executorService = master.getExecutorService(); 1197 resetTempDir(); 1198 snapshotHandlerChoreCleanerTask = 1199 scheduleThreadPool.scheduleAtFixedRate(this::cleanupSentinels, 10, 10, TimeUnit.SECONDS); 1200 } 1201 1202 @Override 1203 public String getProcedureSignature() { 1204 return ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION; 1205 } 1206 1207 @Override 1208 public void execProcedure(ProcedureDescription desc) throws IOException { 1209 takeSnapshot(toSnapshotDescription(desc)); 1210 } 1211 1212 @Override 1213 public void checkPermissions(ProcedureDescription desc, AccessChecker accessChecker, User user) 1214 throws IOException { 1215 // Done by AccessController as part of preSnapshot coprocessor hook (legacy code path). 1216 // In future, when we AC is removed for good, that check should be moved here. 1217 } 1218 1219 @Override 1220 public boolean isProcedureDone(ProcedureDescription desc) throws IOException { 1221 return isSnapshotDone(toSnapshotDescription(desc)); 1222 } 1223 1224 private SnapshotDescription toSnapshotDescription(ProcedureDescription desc) 1225 throws IOException { 1226 SnapshotDescription.Builder builder = SnapshotDescription.newBuilder(); 1227 if (!desc.hasInstance()) { 1228 throw new IOException("Snapshot name is not defined: " + desc.toString()); 1229 } 1230 String snapshotName = desc.getInstance(); 1231 List<NameStringPair> props = desc.getConfigurationList(); 1232 String table = null; 1233 for (NameStringPair prop : props) { 1234 if ("table".equalsIgnoreCase(prop.getName())) { 1235 table = prop.getValue(); 1236 } 1237 } 1238 if (table == null) { 1239 throw new IOException("Snapshot table is not defined: " + desc.toString()); 1240 } 1241 TableName tableName = TableName.valueOf(table); 1242 builder.setTable(tableName.getNameAsString()); 1243 builder.setName(snapshotName); 1244 builder.setType(SnapshotDescription.Type.FLUSH); 1245 return builder.build(); 1246 } 1247}