001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.snapshot; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.net.URI; 023import java.util.HashSet; 024import java.util.List; 025import java.util.Set; 026import java.util.concurrent.CancellationException; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.FileUtil; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.hbase.MetaTableAccessor; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.RegionInfo; 035import org.apache.hadoop.hbase.client.TableDescriptor; 036import org.apache.hadoop.hbase.errorhandling.ForeignException; 037import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; 038import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare; 039import org.apache.hadoop.hbase.executor.EventHandler; 040import org.apache.hadoop.hbase.executor.EventType; 041import org.apache.hadoop.hbase.master.MasterServices; 042import org.apache.hadoop.hbase.master.MetricsSnapshot; 043import org.apache.hadoop.hbase.master.SnapshotSentinel; 044import org.apache.hadoop.hbase.master.locking.LockManager; 045import org.apache.hadoop.hbase.master.locking.LockManager.MasterLock; 046import org.apache.hadoop.hbase.monitoring.MonitoredTask; 047import org.apache.hadoop.hbase.monitoring.TaskMonitor; 048import org.apache.hadoop.hbase.procedure2.LockType; 049import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 050import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; 051import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 052import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 053import org.apache.hadoop.hbase.util.CommonFSUtils; 054import org.apache.hadoop.hbase.util.Pair; 055import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 056import org.apache.yetus.audience.InterfaceAudience; 057import org.apache.zookeeper.KeeperException; 058import org.slf4j.Logger; 059import org.slf4j.LoggerFactory; 060 061import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 062 063import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 064import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 065 066/** 067 * A handler for taking snapshots from the master. 068 * 069 * This is not a subclass of TableEventHandler because using that would incur an extra hbase:meta scan. 070 * 071 * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor. 072 */ 073@InterfaceAudience.Private 074public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel, 075 ForeignExceptionSnare { 076 private static final Logger LOG = LoggerFactory.getLogger(TakeSnapshotHandler.class); 077 078 private volatile boolean finished; 079 080 // none of these should ever be null 081 protected final MasterServices master; 082 protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot(); 083 protected final SnapshotDescription snapshot; 084 protected final Configuration conf; 085 protected final FileSystem rootFs; 086 protected final FileSystem workingDirFs; 087 protected final Path rootDir; 088 private final Path snapshotDir; 089 protected final Path workingDir; 090 private final MasterSnapshotVerifier verifier; 091 protected final ForeignExceptionDispatcher monitor; 092 private final LockManager.MasterLock tableLock; 093 protected final MonitoredTask status; 094 protected final TableName snapshotTable; 095 protected final SnapshotManifest snapshotManifest; 096 protected final SnapshotManager snapshotManager; 097 098 protected TableDescriptor htd; 099 100 /** 101 * @param snapshot descriptor of the snapshot to take 102 * @param masterServices master services provider 103 * @throws IllegalArgumentException if the working snapshot directory set from the 104 * configuration is the same as the completed snapshot directory 105 * @throws IOException if the file system of the working snapshot directory cannot be 106 * determined 107 */ 108 public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices, 109 final SnapshotManager snapshotManager) throws IOException { 110 super(masterServices, EventType.C_M_SNAPSHOT_TABLE); 111 assert snapshot != null : "SnapshotDescription must not be nul1"; 112 assert masterServices != null : "MasterServices must not be nul1"; 113 this.master = masterServices; 114 this.conf = this.master.getConfiguration(); 115 this.rootDir = this.master.getMasterFileSystem().getRootDir(); 116 this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, conf); 117 Preconditions.checkArgument(!SnapshotDescriptionUtils.isSubDirectoryOf(workingDir, rootDir) || 118 SnapshotDescriptionUtils.isWithinDefaultWorkingDir(workingDir, conf), 119 "The working directory " + workingDir + " cannot be in the root directory unless it is " 120 + "within the default working directory"); 121 122 this.snapshot = snapshot; 123 this.snapshotManager = snapshotManager; 124 this.snapshotTable = TableName.valueOf(snapshot.getTable()); 125 this.rootFs = this.master.getMasterFileSystem().getFileSystem(); 126 this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); 127 this.workingDirFs = this.workingDir.getFileSystem(this.conf); 128 this.monitor = new ForeignExceptionDispatcher(snapshot.getName()); 129 this.snapshotManifest = SnapshotManifest.create(conf, rootFs, workingDir, snapshot, monitor); 130 131 this.tableLock = master.getLockManager().createMasterLock( 132 snapshotTable, LockType.EXCLUSIVE, 133 this.getClass().getName() + ": take snapshot " + snapshot.getName()); 134 135 // prepare the verify 136 this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, workingDirFs); 137 // update the running tasks 138 this.status = TaskMonitor.get().createStatus( 139 "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable); 140 } 141 142 private TableDescriptor loadTableDescriptor() 143 throws FileNotFoundException, IOException { 144 TableDescriptor htd = 145 this.master.getTableDescriptors().get(snapshotTable); 146 if (htd == null) { 147 throw new IOException("TableDescriptor missing for " + snapshotTable); 148 } 149 return htd; 150 } 151 152 @Override 153 public TakeSnapshotHandler prepare() throws Exception { 154 super.prepare(); 155 // after this, you should ensure to release this lock in case of exceptions 156 this.tableLock.acquire(); 157 try { 158 this.htd = loadTableDescriptor(); // check that .tableinfo is present 159 } catch (Exception e) { 160 this.tableLock.release(); 161 throw e; 162 } 163 return this; 164 } 165 166 /** 167 * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} 168 * call should get implemented for each snapshot flavor. 169 */ 170 @Override 171 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION", 172 justification="Intentional") 173 public void process() { 174 String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " 175 + eventType + " on table " + snapshotTable; 176 LOG.info(msg); 177 MasterLock tableLockToRelease = this.tableLock; 178 status.setStatus(msg); 179 try { 180 if (downgradeToSharedTableLock()) { 181 // release the exclusive lock and hold the shared lock instead 182 tableLockToRelease = master.getLockManager().createMasterLock(snapshotTable, 183 LockType.SHARED, this.getClass().getName() + ": take snapshot " + snapshot.getName()); 184 tableLock.release(); 185 tableLockToRelease.acquire(); 186 } 187 // If regions move after this meta scan, the region specific snapshot should fail, triggering 188 // an external exception that gets captured here. 189 190 // write down the snapshot info in the working directory 191 SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs); 192 snapshotManifest.addTableDescriptor(this.htd); 193 monitor.rethrowException(); 194 195 List<Pair<RegionInfo, ServerName>> regionsAndLocations; 196 if (TableName.META_TABLE_NAME.equals(snapshotTable)) { 197 regionsAndLocations = MetaTableLocator.getMetaRegionsAndLocations( 198 server.getZooKeeper()); 199 } else { 200 regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations( 201 server.getConnection(), snapshotTable, false); 202 } 203 204 // run the snapshot 205 snapshotRegions(regionsAndLocations); 206 monitor.rethrowException(); 207 208 // extract each pair to separate lists 209 Set<String> serverNames = new HashSet<>(); 210 for (Pair<RegionInfo, ServerName> p : regionsAndLocations) { 211 if (p != null && p.getFirst() != null && p.getSecond() != null) { 212 RegionInfo hri = p.getFirst(); 213 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue; 214 serverNames.add(p.getSecond().toString()); 215 } 216 } 217 218 // flush the in-memory state, and write the single manifest 219 status.setStatus("Consolidate snapshot: " + snapshot.getName()); 220 snapshotManifest.consolidate(); 221 222 // verify the snapshot is valid 223 status.setStatus("Verifying snapshot: " + snapshot.getName()); 224 verifier.verifySnapshot(this.workingDir, serverNames); 225 226 // complete the snapshot, atomically moving from tmp to .snapshot dir. 227 SnapshotDescriptionUtils.completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs, 228 this.workingDirFs, this.conf); 229 finished = true; 230 msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed"; 231 status.markComplete(msg); 232 LOG.info(msg); 233 metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime()); 234 if (master.getMasterCoprocessorHost() != null) { 235 master.getMasterCoprocessorHost() 236 .postCompletedSnapshotAction(ProtobufUtil.createSnapshotDesc(snapshot), this.htd); 237 } 238 } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION 239 status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " + 240 snapshotTable + " because " + e.getMessage()); 241 String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) 242 + " due to exception:" + e.getMessage(); 243 LOG.error(reason, e); 244 ForeignException ee = new ForeignException(reason, e); 245 monitor.receive(ee); 246 // need to mark this completed to close off and allow cleanup to happen. 247 cancel(reason); 248 } finally { 249 LOG.debug("Launching cleanup of working dir:" + workingDir); 250 try { 251 // if the working dir is still present, the snapshot has failed. it is present we delete 252 // it. 253 if (!workingDirFs.delete(workingDir, true)) { 254 LOG.error("Couldn't delete snapshot working directory:" + workingDir); 255 } 256 } catch (IOException e) { 257 LOG.error("Couldn't delete snapshot working directory:" + workingDir); 258 } 259 tableLockToRelease.release(); 260 } 261 } 262 263 /** 264 * When taking snapshot, first we must acquire the exclusive table lock to confirm that there are 265 * no ongoing merge/split procedures. But later, we should try our best to release the exclusive 266 * lock as this may hurt the availability, because we need to hold the shared lock when assigning 267 * regions. 268 * <p/> 269 * See HBASE-21480 for more details. 270 */ 271 protected abstract boolean downgradeToSharedTableLock(); 272 273 /** 274 * Snapshot the specified regions 275 */ 276 protected abstract void snapshotRegions(List<Pair<RegionInfo, ServerName>> regions) 277 throws IOException, KeeperException; 278 279 /** 280 * Take a snapshot of the specified disabled region 281 */ 282 protected void snapshotDisabledRegion(final RegionInfo regionInfo) 283 throws IOException { 284 snapshotManifest.addRegion(CommonFSUtils.getTableDir(rootDir, snapshotTable), regionInfo); 285 monitor.rethrowException(); 286 status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() + 287 " of table: " + snapshotTable); 288 } 289 290 @Override 291 public void cancel(String why) { 292 if (finished) return; 293 294 this.finished = true; 295 LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + 296 " because: " + why); 297 CancellationException ce = new CancellationException(why); 298 monitor.receive(new ForeignException(master.getServerName().toString(), ce)); 299 } 300 301 @Override 302 public boolean isFinished() { 303 return finished; 304 } 305 306 @Override 307 public long getCompletionTimestamp() { 308 return this.status.getCompletionTimestamp(); 309 } 310 311 @Override 312 public SnapshotDescription getSnapshot() { 313 return snapshot; 314 } 315 316 @Override 317 public ForeignException getExceptionIfFailed() { 318 return monitor.getException(); 319 } 320 321 @Override 322 public void rethrowExceptionIfFailed() throws ForeignException { 323 monitor.rethrowException(); 324 } 325 326 @Override 327 public void rethrowException() throws ForeignException { 328 monitor.rethrowException(); 329 } 330 331 @Override 332 public boolean hasException() { 333 return monitor.hasException(); 334 } 335 336 @Override 337 public ForeignException getException() { 338 return monitor.getException(); 339 } 340}