View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.HashSet;
23  import java.util.List;
24  import java.util.Set;
25  import java.util.concurrent.CancellationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.ServerName;
37  import org.apache.hadoop.hbase.catalog.MetaReader;
38  import org.apache.hadoop.hbase.errorhandling.ForeignException;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
41  import org.apache.hadoop.hbase.executor.EventHandler;
42  import org.apache.hadoop.hbase.executor.EventType;
43  import org.apache.hadoop.hbase.master.MasterServices;
44  import org.apache.hadoop.hbase.master.MetricsSnapshot;
45  import org.apache.hadoop.hbase.master.SnapshotSentinel;
46  import org.apache.hadoop.hbase.master.TableLockManager;
47  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
48  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51  import org.apache.hadoop.hbase.regionserver.HRegion;
52  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
53  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
54  import org.apache.hadoop.hbase.snapshot.CopyRecoveredEditsTask;
55  import org.apache.hadoop.hbase.snapshot.ReferenceRegionHFilesTask;
56  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
57  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
58  import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask;
59  import org.apache.hadoop.hbase.util.Pair;
60  import org.apache.zookeeper.KeeperException;
61  
62  /**
63   * A handler for taking snapshots from the master.
64   *
65   * This is not a subclass of TableEventHandler because using that would incur an extra hbase:meta scan.
66   *
67   * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
68   */
69  @InterfaceAudience.Private
70  public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
71      ForeignExceptionSnare {
72    private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
73  
74    private volatile boolean finished;
75  
76    // none of these should ever be null
77    protected final MasterServices master;
78    protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
79    protected final SnapshotDescription snapshot;
80    protected final Configuration conf;
81    protected final FileSystem fs;
82    protected final Path rootDir;
83    private final Path snapshotDir;
84    protected final Path workingDir;
85    private final MasterSnapshotVerifier verifier;
86    protected final ForeignExceptionDispatcher monitor;
87    protected final TableLockManager tableLockManager;
88    protected final TableLock tableLock;
89    protected final MonitoredTask status;
90    protected final TableName snapshotTable;
91  
92    /**
93     * @param snapshot descriptor of the snapshot to take
94     * @param masterServices master services provider
95     */
96    public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) {
97      super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
98      assert snapshot != null : "SnapshotDescription must not be nul1";
99      assert masterServices != null : "MasterServices must not be nul1";
100 
101     this.master = masterServices;
102     this.snapshot = snapshot;
103     this.snapshotTable = TableName.valueOf(snapshot.getTable());
104     this.conf = this.master.getConfiguration();
105     this.fs = this.master.getMasterFileSystem().getFileSystem();
106     this.rootDir = this.master.getMasterFileSystem().getRootDir();
107     this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
108     this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
109     this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
110 
111     this.tableLockManager = master.getTableLockManager();
112     this.tableLock = this.tableLockManager.writeLock(
113         snapshotTable,
114         EventType.C_M_SNAPSHOT_TABLE.toString());
115 
116     // prepare the verify
117     this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
118     // update the running tasks
119     this.status = TaskMonitor.get().createStatus(
120       "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
121   }
122 
123   private HTableDescriptor loadTableDescriptor()
124       throws FileNotFoundException, IOException {
125     HTableDescriptor htd =
126       this.master.getTableDescriptors().get(snapshotTable);
127     if (htd == null) {
128       throw new IOException("HTableDescriptor missing for " + snapshotTable);
129     }
130     return htd;
131   }
132 
133   public TakeSnapshotHandler prepare() throws Exception {
134     super.prepare();
135     this.tableLock.acquire(); // after this, you should ensure to release this lock in
136                               // case of exceptions
137     boolean success = false;
138     try {
139       loadTableDescriptor(); // check that .tableinfo is present
140       success = true;
141     } finally {
142       if (!success) {
143         releaseTableLock();
144       }
145     }
146 
147     return this;
148   }
149 
150   /**
151    * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
152    * call should get implemented for each snapshot flavor.
153    */
154   @Override
155   public void process() {
156     String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
157         + eventType + " on table " + snapshotTable;
158     LOG.info(msg);
159     status.setStatus(msg);
160     try {
161       // If regions move after this meta scan, the region specific snapshot should fail, triggering
162       // an external exception that gets captured here.
163 
164       // write down the snapshot info in the working directory
165       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs);
166       new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call();
167       monitor.rethrowException();
168 
169       List<Pair<HRegionInfo, ServerName>> regionsAndLocations =
170           MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(),
171               snapshotTable, false);
172 
173       // run the snapshot
174       snapshotRegions(regionsAndLocations);
175       monitor.rethrowException();
176 
177       // extract each pair to separate lists
178       Set<String> serverNames = new HashSet<String>();
179       for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
180         if (p != null && p.getFirst() != null && p.getSecond() != null) {
181           HRegionInfo hri = p.getFirst();
182           if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
183           serverNames.add(p.getSecond().toString());
184         }
185       }
186 
187       // verify the snapshot is valid
188       status.setStatus("Verifying snapshot: " + snapshot.getName());
189       verifier.verifySnapshot(this.workingDir, serverNames);
190 
191       // complete the snapshot, atomically moving from tmp to .snapshot dir.
192       completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
193       status.markComplete("Snapshot " + snapshot.getName() + " of table " + snapshotTable
194           + " completed");
195       LOG.info("Snapshot " + snapshot.getName() + " of table " + snapshotTable
196           + " completed");
197       metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
198     } catch (Exception e) {
199       status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
200           snapshotTable + " because " + e.getMessage());
201       String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
202           + " due to exception:" + e.getMessage();
203       LOG.error(reason, e);
204       ForeignException ee = new ForeignException(reason, e);
205       monitor.receive(ee);
206       // need to mark this completed to close off and allow cleanup to happen.
207       cancel("Failed to take snapshot '" + ClientSnapshotDescriptionUtils.toString(snapshot)
208           + "' due to exception");
209     } finally {
210       LOG.debug("Launching cleanup of working dir:" + workingDir);
211       try {
212         // if the working dir is still present, the snapshot has failed.  it is present we delete
213         // it.
214         if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
215           LOG.error("Couldn't delete snapshot working directory:" + workingDir);
216         }
217       } catch (IOException e) {
218         LOG.error("Couldn't delete snapshot working directory:" + workingDir);
219       }
220       releaseTableLock();
221     }
222   }
223 
224   protected void releaseTableLock() {
225     if (this.tableLock != null) {
226       try {
227         this.tableLock.release();
228       } catch (IOException ex) {
229         LOG.warn("Could not release the table lock", ex);
230       }
231     }
232   }
233 
234   /**
235    * Reset the manager to allow another snapshot to proceed
236    *
237    * @param snapshotDir final path of the snapshot
238    * @param workingDir directory where the in progress snapshot was built
239    * @param fs {@link FileSystem} where the snapshot was built
240    * @throws SnapshotCreationException if the snapshot could not be moved
241    * @throws IOException the filesystem could not be reached
242    */
243   public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
244       throws SnapshotCreationException, IOException {
245     LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
246         + snapshotDir);
247     if (!fs.rename(workingDir, snapshotDir)) {
248       throw new SnapshotCreationException("Failed to move working directory(" + workingDir
249           + ") to completed directory(" + snapshotDir + ").");
250     }
251     finished = true;
252   }
253 
254   /**
255    * Snapshot the specified regions
256    */
257   protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
258       throws IOException, KeeperException;
259 
260   /**
261    * Take a snapshot of the specified disabled region
262    */
263   protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
264       throws IOException {
265     // 2 copy the regionInfo files to the snapshot
266     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
267       workingDir, regionInfo);
268 
269     // check for error for each region
270     monitor.rethrowException();
271 
272     // 2 for each region, copy over its recovered.edits directory
273     Path regionDir = HRegion.getRegionDir(rootDir, regionInfo);
274     Path snapshotRegionDir = regionFs.getRegionDir();
275     new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, snapshotRegionDir).call();
276     monitor.rethrowException();
277     status.setStatus("Completed copying recovered edits for offline snapshot of table: "
278         + snapshotTable);
279 
280     // 2 reference all the files in the region
281     new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, fs, snapshotRegionDir).call();
282     monitor.rethrowException();
283     status.setStatus("Completed referencing HFiles for offline snapshot of table: " +
284         snapshotTable);
285   }
286 
287   @Override
288   public void cancel(String why) {
289     if (finished) return;
290 
291     this.finished = true;
292     LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
293         " because: " + why);
294     CancellationException ce = new CancellationException(why);
295     monitor.receive(new ForeignException(master.getServerName().toString(), ce));
296   }
297 
298   @Override
299   public boolean isFinished() {
300     return finished;
301   }
302 
303   @Override
304   public long getCompletionTimestamp() {
305     return this.status.getCompletionTimestamp();
306   }
307 
308   @Override
309   public SnapshotDescription getSnapshot() {
310     return snapshot;
311   }
312 
313   @Override
314   public ForeignException getExceptionIfFailed() {
315     return monitor.getException();
316   }
317 
318   @Override
319   public void rethrowExceptionIfFailed() throws ForeignException {
320     monitor.rethrowException();
321   }
322 
323   @Override
324   public void rethrowException() throws ForeignException {
325     monitor.rethrowException();
326   }
327 
328   @Override
329   public boolean hasException() {
330     return monitor.hasException();
331   }
332 
333   @Override
334   public ForeignException getException() {
335     return monitor.getException();
336   }
337 
338 }