View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.HashSet;
23  import java.util.List;
24  import java.util.Set;
25  import java.util.concurrent.CancellationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.ServerName;
37  import org.apache.hadoop.hbase.MetaTableAccessor;
38  import org.apache.hadoop.hbase.errorhandling.ForeignException;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
41  import org.apache.hadoop.hbase.executor.EventHandler;
42  import org.apache.hadoop.hbase.executor.EventType;
43  import org.apache.hadoop.hbase.master.MasterServices;
44  import org.apache.hadoop.hbase.master.MetricsSnapshot;
45  import org.apache.hadoop.hbase.master.SnapshotSentinel;
46  import org.apache.hadoop.hbase.master.TableLockManager;
47  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
48  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
52  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
53  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
54  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
55  import org.apache.hadoop.hbase.util.FSUtils;
56  import org.apache.hadoop.hbase.util.Pair;
57  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
58  import org.apache.zookeeper.KeeperException;
59  
60  /**
61   * A handler for taking snapshots from the master.
62   *
63   * This is not a subclass of TableEventHandler because using that would incur an extra hbase:meta scan.
64   *
65   * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
66   */
67  @InterfaceAudience.Private
68  public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
69      ForeignExceptionSnare {
70    private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
71  
72    private volatile boolean finished;
73  
74    // none of these should ever be null
75    protected final MasterServices master;
76    protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
77    protected final SnapshotDescription snapshot;
78    protected final Configuration conf;
79    protected final FileSystem fs;
80    protected final Path rootDir;
81    private final Path snapshotDir;
82    protected final Path workingDir;
83    private final MasterSnapshotVerifier verifier;
84    protected final ForeignExceptionDispatcher monitor;
85    protected final TableLockManager tableLockManager;
86    protected final TableLock tableLock;
87    protected final MonitoredTask status;
88    protected final TableName snapshotTable;
89    protected final SnapshotManifest snapshotManifest;
90  
91    protected HTableDescriptor htd;
92  
93    /**
94     * @param snapshot descriptor of the snapshot to take
95     * @param masterServices master services provider
96     */
97    public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) {
98      super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
99      assert snapshot != null : "SnapshotDescription must not be nul1";
100     assert masterServices != null : "MasterServices must not be nul1";
101 
102     this.master = masterServices;
103     this.snapshot = snapshot;
104     this.snapshotTable = TableName.valueOf(snapshot.getTable());
105     this.conf = this.master.getConfiguration();
106     this.fs = this.master.getMasterFileSystem().getFileSystem();
107     this.rootDir = this.master.getMasterFileSystem().getRootDir();
108     this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
109     this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
110     this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
111     this.snapshotManifest = SnapshotManifest.create(conf, fs, workingDir, snapshot, monitor);
112 
113     this.tableLockManager = master.getTableLockManager();
114     this.tableLock = this.tableLockManager.writeLock(
115         snapshotTable,
116         EventType.C_M_SNAPSHOT_TABLE.toString());
117 
118     // prepare the verify
119     this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
120     // update the running tasks
121     this.status = TaskMonitor.get().createStatus(
122       "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
123   }
124 
125   private HTableDescriptor loadTableDescriptor()
126       throws FileNotFoundException, IOException {
127     HTableDescriptor htd =
128       this.master.getTableDescriptors().get(snapshotTable);
129     if (htd == null) {
130       throw new IOException("HTableDescriptor missing for " + snapshotTable);
131     }
132     return htd;
133   }
134 
135   public TakeSnapshotHandler prepare() throws Exception {
136     super.prepare();
137     this.tableLock.acquire(); // after this, you should ensure to release this lock in
138                               // case of exceptions
139     boolean success = false;
140     try {
141       this.htd = loadTableDescriptor(); // check that .tableinfo is present
142       success = true;
143     } finally {
144       if (!success) {
145         releaseTableLock();
146       }
147     }
148 
149     return this;
150   }
151 
152   /**
153    * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
154    * call should get implemented for each snapshot flavor.
155    */
156   @Override
157   public void process() {
158     String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
159         + eventType + " on table " + snapshotTable;
160     LOG.info(msg);
161     status.setStatus(msg);
162     try {
163       // If regions move after this meta scan, the region specific snapshot should fail, triggering
164       // an external exception that gets captured here.
165 
166       // write down the snapshot info in the working directory
167       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
168       snapshotManifest.addTableDescriptor(this.htd);
169       monitor.rethrowException();
170 
171       List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
172       if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
173         regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(
174           server.getZooKeeper());
175       } else {
176         regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(
177           server.getShortCircuitConnection(), snapshotTable, false);
178       }
179 
180       // run the snapshot
181       snapshotRegions(regionsAndLocations);
182       monitor.rethrowException();
183 
184       // extract each pair to separate lists
185       Set<String> serverNames = new HashSet<String>();
186       for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
187         if (p != null && p.getFirst() != null && p.getSecond() != null) {
188           HRegionInfo hri = p.getFirst();
189           if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
190           serverNames.add(p.getSecond().toString());
191         }
192       }
193 
194       // flush the in-memory state, and write the single manifest
195       status.setStatus("Consolidate snapshot: " + snapshot.getName());
196       snapshotManifest.consolidate();
197 
198       // verify the snapshot is valid
199       status.setStatus("Verifying snapshot: " + snapshot.getName());
200       verifier.verifySnapshot(this.workingDir, serverNames);
201 
202       // complete the snapshot, atomically moving from tmp to .snapshot dir.
203       completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
204       msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
205       status.markComplete(msg);
206       LOG.info(msg);
207       metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
208     } catch (Exception e) {
209       status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
210           snapshotTable + " because " + e.getMessage());
211       String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
212           + " due to exception:" + e.getMessage();
213       LOG.error(reason, e);
214       ForeignException ee = new ForeignException(reason, e);
215       monitor.receive(ee);
216       // need to mark this completed to close off and allow cleanup to happen.
217       cancel(reason);
218     } finally {
219       LOG.debug("Launching cleanup of working dir:" + workingDir);
220       try {
221         // if the working dir is still present, the snapshot has failed.  it is present we delete
222         // it.
223         if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
224           LOG.error("Couldn't delete snapshot working directory:" + workingDir);
225         }
226       } catch (IOException e) {
227         LOG.error("Couldn't delete snapshot working directory:" + workingDir);
228       }
229       releaseTableLock();
230     }
231   }
232 
233   protected void releaseTableLock() {
234     if (this.tableLock != null) {
235       try {
236         this.tableLock.release();
237       } catch (IOException ex) {
238         LOG.warn("Could not release the table lock", ex);
239       }
240     }
241   }
242 
243   /**
244    * Reset the manager to allow another snapshot to proceed
245    *
246    * @param snapshotDir final path of the snapshot
247    * @param workingDir directory where the in progress snapshot was built
248    * @param fs {@link FileSystem} where the snapshot was built
249    * @throws SnapshotCreationException if the snapshot could not be moved
250    * @throws IOException the filesystem could not be reached
251    */
252   public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
253       throws SnapshotCreationException, IOException {
254     LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
255         + snapshotDir);
256     if (!fs.rename(workingDir, snapshotDir)) {
257       throw new SnapshotCreationException("Failed to move working directory(" + workingDir
258           + ") to completed directory(" + snapshotDir + ").");
259     }
260     finished = true;
261   }
262 
263   /**
264    * Snapshot the specified regions
265    */
266   protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
267       throws IOException, KeeperException;
268 
269   /**
270    * Take a snapshot of the specified disabled region
271    */
272   protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
273       throws IOException {
274     snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
275     monitor.rethrowException();
276     status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() +
277         " of table: " + snapshotTable);
278   }
279 
280   @Override
281   public void cancel(String why) {
282     if (finished) return;
283 
284     this.finished = true;
285     LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
286         " because: " + why);
287     CancellationException ce = new CancellationException(why);
288     monitor.receive(new ForeignException(master.getServerName().toString(), ce));
289   }
290 
291   @Override
292   public boolean isFinished() {
293     return finished;
294   }
295 
296   @Override
297   public long getCompletionTimestamp() {
298     return this.status.getCompletionTimestamp();
299   }
300 
301   @Override
302   public SnapshotDescription getSnapshot() {
303     return snapshot;
304   }
305 
306   @Override
307   public ForeignException getExceptionIfFailed() {
308     return monitor.getException();
309   }
310 
311   @Override
312   public void rethrowExceptionIfFailed() throws ForeignException {
313     monitor.rethrowException();
314   }
315 
316   @Override
317   public void rethrowException() throws ForeignException {
318     monitor.rethrowException();
319   }
320 
321   @Override
322   public boolean hasException() {
323     return monitor.hasException();
324   }
325 
326   @Override
327   public ForeignException getException() {
328     return monitor.getException();
329   }
330 
331 }