View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.HashSet;
23  import java.util.List;
24  import java.util.Set;
25  import java.util.concurrent.CancellationException;
26  import java.util.concurrent.locks.ReentrantLock;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.HRegionInfo;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.ServerName;
38  import org.apache.hadoop.hbase.MetaTableAccessor;
39  import org.apache.hadoop.hbase.errorhandling.ForeignException;
40  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
41  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
42  import org.apache.hadoop.hbase.executor.EventHandler;
43  import org.apache.hadoop.hbase.executor.EventType;
44  import org.apache.hadoop.hbase.master.MasterServices;
45  import org.apache.hadoop.hbase.master.MetricsSnapshot;
46  import org.apache.hadoop.hbase.master.SnapshotSentinel;
47  import org.apache.hadoop.hbase.master.TableLockManager;
48  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
49  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
50  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
51  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
52  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
53  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
54  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
55  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
56  import org.apache.hadoop.hbase.util.FSUtils;
57  import org.apache.hadoop.hbase.util.Pair;
58  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
59  import org.apache.zookeeper.KeeperException;
60  
61  /**
62   * A handler for taking snapshots from the master.
63   *
64   * This is not a subclass of TableEventHandler because using that would incur an extra hbase:meta scan.
65   *
66   * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
67   */
68  @InterfaceAudience.Private
69  public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
70      ForeignExceptionSnare {
71    private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
72  
73    private volatile boolean finished;
74  
75    // none of these should ever be null
76    protected final MasterServices master;
77    protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
78    protected final SnapshotDescription snapshot;
79    protected final Configuration conf;
80    protected final FileSystem fs;
81    protected final Path rootDir;
82    private final Path snapshotDir;
83    protected final Path workingDir;
84    private final MasterSnapshotVerifier verifier;
85    protected final ForeignExceptionDispatcher monitor;
86    protected final TableLockManager tableLockManager;
87    protected final TableLock tableLock;
88    protected final MonitoredTask status;
89    protected final TableName snapshotTable;
90    protected final SnapshotManifest snapshotManifest;
91    protected final SnapshotManager snapshotManager;
92  
93    protected HTableDescriptor htd;
94  
95    /**
96     * @param snapshot descriptor of the snapshot to take
97     * @param masterServices master services provider
98     */
99    public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
100                              final SnapshotManager snapshotManager) {
101     super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
102     assert snapshot != null : "SnapshotDescription must not be nul1";
103     assert masterServices != null : "MasterServices must not be nul1";
104 
105     this.master = masterServices;
106     this.snapshot = snapshot;
107     this.snapshotManager = snapshotManager;
108     this.snapshotTable = TableName.valueOf(snapshot.getTable());
109     this.conf = this.master.getConfiguration();
110     this.fs = this.master.getMasterFileSystem().getFileSystem();
111     this.rootDir = this.master.getMasterFileSystem().getRootDir();
112     this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
113     this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
114     this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
115     this.snapshotManifest = SnapshotManifest.create(conf, fs, workingDir, snapshot, monitor);
116 
117     this.tableLockManager = master.getTableLockManager();
118     this.tableLock = this.tableLockManager.writeLock(
119         snapshotTable,
120         EventType.C_M_SNAPSHOT_TABLE.toString());
121 
122     // prepare the verify
123     this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
124     // update the running tasks
125     this.status = TaskMonitor.get().createStatus(
126       "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
127   }
128 
129   private HTableDescriptor loadTableDescriptor()
130       throws FileNotFoundException, IOException {
131     HTableDescriptor htd =
132       this.master.getTableDescriptors().get(snapshotTable);
133     if (htd == null) {
134       throw new IOException("HTableDescriptor missing for " + snapshotTable);
135     }
136     return htd;
137   }
138 
139   public TakeSnapshotHandler prepare() throws Exception {
140     super.prepare();
141     this.tableLock.acquire(); // after this, you should ensure to release this lock in
142                               // case of exceptions
143     boolean success = false;
144     try {
145       this.htd = loadTableDescriptor(); // check that .tableinfo is present
146       success = true;
147     } finally {
148       if (!success) {
149         releaseTableLock();
150       }
151     }
152 
153     return this;
154   }
155 
156   /**
157    * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
158    * call should get implemented for each snapshot flavor.
159    */
160   @Override
161   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
162     justification="Intentional")
163   public void process() {
164     String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
165         + eventType + " on table " + snapshotTable;
166     LOG.info(msg);
167     status.setStatus(msg);
168     try {
169       // If regions move after this meta scan, the region specific snapshot should fail, triggering
170       // an external exception that gets captured here.
171 
172       // write down the snapshot info in the working directory
173       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
174       snapshotManifest.addTableDescriptor(this.htd);
175       monitor.rethrowException();
176 
177       List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
178       if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
179         regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(
180           server.getZooKeeper());
181       } else {
182         regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(
183           server.getZooKeeper(), server.getConnection(), snapshotTable, false);
184       }
185 
186       // run the snapshot
187       snapshotRegions(regionsAndLocations);
188       monitor.rethrowException();
189 
190       // extract each pair to separate lists
191       Set<String> serverNames = new HashSet<String>();
192       for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
193         if (p != null && p.getFirst() != null && p.getSecond() != null) {
194           HRegionInfo hri = p.getFirst();
195           if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
196           serverNames.add(p.getSecond().toString());
197         }
198       }
199 
200       // flush the in-memory state, and write the single manifest
201       status.setStatus("Consolidate snapshot: " + snapshot.getName());
202       snapshotManifest.consolidate();
203 
204       // verify the snapshot is valid
205       status.setStatus("Verifying snapshot: " + snapshot.getName());
206       verifier.verifySnapshot(this.workingDir, serverNames);
207 
208       // complete the snapshot, atomically moving from tmp to .snapshot dir.
209       completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
210       msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
211       status.markComplete(msg);
212       LOG.info(msg);
213       metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
214     } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION
215       status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
216           snapshotTable + " because " + e.getMessage());
217       String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
218           + " due to exception:" + e.getMessage();
219       LOG.error(reason, e);
220       ForeignException ee = new ForeignException(reason, e);
221       monitor.receive(ee);
222       // need to mark this completed to close off and allow cleanup to happen.
223       cancel(reason);
224     } finally {
225       LOG.debug("Launching cleanup of working dir:" + workingDir);
226       try {
227         // if the working dir is still present, the snapshot has failed.  it is present we delete
228         // it.
229         if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
230           LOG.error("Couldn't delete snapshot working directory:" + workingDir);
231         }
232       } catch (IOException e) {
233         LOG.error("Couldn't delete snapshot working directory:" + workingDir);
234       }
235       releaseTableLock();
236     }
237   }
238 
239   protected void releaseTableLock() {
240     if (this.tableLock != null) {
241       try {
242         this.tableLock.release();
243       } catch (IOException ex) {
244         LOG.warn("Could not release the table lock", ex);
245       }
246     }
247   }
248 
249   /**
250    * Reset the manager to allow another snapshot to proceed
251    *
252    * @param snapshotDir final path of the snapshot
253    * @param workingDir directory where the in progress snapshot was built
254    * @param fs {@link FileSystem} where the snapshot was built
255    * @throws SnapshotCreationException if the snapshot could not be moved
256    * @throws IOException the filesystem could not be reached
257    */
258   public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
259       throws SnapshotCreationException, IOException {
260     LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
261         + snapshotDir);
262     if (!fs.rename(workingDir, snapshotDir)) {
263       throw new SnapshotCreationException("Failed to move working directory(" + workingDir
264           + ") to completed directory(" + snapshotDir + ").");
265     }
266     finished = true;
267   }
268 
269   /**
270    * Snapshot the specified regions
271    */
272   protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
273       throws IOException, KeeperException;
274 
275   /**
276    * Take a snapshot of the specified disabled region
277    */
278   protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
279       throws IOException {
280     snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
281     monitor.rethrowException();
282     status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() +
283         " of table: " + snapshotTable);
284   }
285 
286   @Override
287   public void cancel(String why) {
288     if (finished) return;
289 
290     this.finished = true;
291     LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
292         " because: " + why);
293     CancellationException ce = new CancellationException(why);
294     monitor.receive(new ForeignException(master.getServerName().toString(), ce));
295   }
296 
297   @Override
298   public boolean isFinished() {
299     return finished;
300   }
301 
302   @Override
303   public long getCompletionTimestamp() {
304     return this.status.getCompletionTimestamp();
305   }
306 
307   @Override
308   public SnapshotDescription getSnapshot() {
309     return snapshot;
310   }
311 
312   @Override
313   public ForeignException getExceptionIfFailed() {
314     return monitor.getException();
315   }
316 
317   @Override
318   public void rethrowExceptionIfFailed() throws ForeignException {
319     monitor.rethrowException();
320   }
321 
322   @Override
323   public void rethrowException() throws ForeignException {
324     monitor.rethrowException();
325   }
326 
327   @Override
328   public boolean hasException() {
329     return monitor.hasException();
330   }
331 
332   @Override
333   public ForeignException getException() {
334     return monitor.getException();
335   }
336 
337 }