1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.snapshot;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.util.Set;
25 import java.util.concurrent.CancellationException;
26 import java.util.concurrent.locks.ReentrantLock;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.conf.Configuration;
32 import org.apache.hadoop.fs.FileSystem;
33 import org.apache.hadoop.fs.Path;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.HRegionInfo;
36 import org.apache.hadoop.hbase.HTableDescriptor;
37 import org.apache.hadoop.hbase.ServerName;
38 import org.apache.hadoop.hbase.MetaTableAccessor;
39 import org.apache.hadoop.hbase.errorhandling.ForeignException;
40 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
41 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
42 import org.apache.hadoop.hbase.executor.EventHandler;
43 import org.apache.hadoop.hbase.executor.EventType;
44 import org.apache.hadoop.hbase.master.MasterServices;
45 import org.apache.hadoop.hbase.master.MetricsSnapshot;
46 import org.apache.hadoop.hbase.master.SnapshotSentinel;
47 import org.apache.hadoop.hbase.master.TableLockManager;
48 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
49 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
50 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
51 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
52 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
53 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
54 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
55 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
56 import org.apache.hadoop.hbase.util.FSUtils;
57 import org.apache.hadoop.hbase.util.Pair;
58 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
59 import org.apache.zookeeper.KeeperException;
60
61
62
63
64
65
66
67
68 @InterfaceAudience.Private
69 public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
70 ForeignExceptionSnare {
71 private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
72
73 private volatile boolean finished;
74
75
76 protected final MasterServices master;
77 protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
78 protected final SnapshotDescription snapshot;
79 protected final Configuration conf;
80 protected final FileSystem fs;
81 protected final Path rootDir;
82 private final Path snapshotDir;
83 protected final Path workingDir;
84 private final MasterSnapshotVerifier verifier;
85 protected final ForeignExceptionDispatcher monitor;
86 protected final TableLockManager tableLockManager;
87 protected final TableLock tableLock;
88 protected final MonitoredTask status;
89 protected final TableName snapshotTable;
90 protected final SnapshotManifest snapshotManifest;
91 protected final SnapshotManager snapshotManager;
92
93 protected HTableDescriptor htd;
94
95
96
97
98
99 public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
100 final SnapshotManager snapshotManager) {
101 super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
102 assert snapshot != null : "SnapshotDescription must not be nul1";
103 assert masterServices != null : "MasterServices must not be nul1";
104
105 this.master = masterServices;
106 this.snapshot = snapshot;
107 this.snapshotManager = snapshotManager;
108 this.snapshotTable = TableName.valueOf(snapshot.getTable());
109 this.conf = this.master.getConfiguration();
110 this.fs = this.master.getMasterFileSystem().getFileSystem();
111 this.rootDir = this.master.getMasterFileSystem().getRootDir();
112 this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
113 this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
114 this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
115 this.snapshotManifest = SnapshotManifest.create(conf, fs, workingDir, snapshot, monitor);
116
117 this.tableLockManager = master.getTableLockManager();
118 this.tableLock = this.tableLockManager.writeLock(
119 snapshotTable,
120 EventType.C_M_SNAPSHOT_TABLE.toString());
121
122
123 this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
124
125 this.status = TaskMonitor.get().createStatus(
126 "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
127 }
128
129 private HTableDescriptor loadTableDescriptor()
130 throws FileNotFoundException, IOException {
131 HTableDescriptor htd =
132 this.master.getTableDescriptors().get(snapshotTable);
133 if (htd == null) {
134 throw new IOException("HTableDescriptor missing for " + snapshotTable);
135 }
136 return htd;
137 }
138
139 public TakeSnapshotHandler prepare() throws Exception {
140 super.prepare();
141 this.tableLock.acquire();
142
143 boolean success = false;
144 try {
145 this.htd = loadTableDescriptor();
146 success = true;
147 } finally {
148 if (!success) {
149 releaseTableLock();
150 }
151 }
152
153 return this;
154 }
155
156
157
158
159
160 @Override
161 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
162 justification="Intentional")
163 public void process() {
164 String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
165 + eventType + " on table " + snapshotTable;
166 LOG.info(msg);
167 status.setStatus(msg);
168 try {
169
170
171
172
173 SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
174 snapshotManifest.addTableDescriptor(this.htd);
175 monitor.rethrowException();
176
177 List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
178 if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
179 regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(
180 server.getZooKeeper());
181 } else {
182 regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(
183 server.getZooKeeper(), server.getConnection(), snapshotTable, false);
184 }
185
186
187 snapshotRegions(regionsAndLocations);
188 monitor.rethrowException();
189
190
191 Set<String> serverNames = new HashSet<String>();
192 for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
193 if (p != null && p.getFirst() != null && p.getSecond() != null) {
194 HRegionInfo hri = p.getFirst();
195 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
196 serverNames.add(p.getSecond().toString());
197 }
198 }
199
200
201 status.setStatus("Consolidate snapshot: " + snapshot.getName());
202 snapshotManifest.consolidate();
203
204
205 status.setStatus("Verifying snapshot: " + snapshot.getName());
206 verifier.verifySnapshot(this.workingDir, serverNames);
207
208
209 completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
210 msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
211 status.markComplete(msg);
212 LOG.info(msg);
213 metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
214 } catch (Exception e) {
215 status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
216 snapshotTable + " because " + e.getMessage());
217 String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
218 + " due to exception:" + e.getMessage();
219 LOG.error(reason, e);
220 ForeignException ee = new ForeignException(reason, e);
221 monitor.receive(ee);
222
223 cancel(reason);
224 } finally {
225 LOG.debug("Launching cleanup of working dir:" + workingDir);
226 try {
227
228
229 if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
230 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
231 }
232 } catch (IOException e) {
233 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
234 }
235 releaseTableLock();
236 }
237 }
238
239 protected void releaseTableLock() {
240 if (this.tableLock != null) {
241 try {
242 this.tableLock.release();
243 } catch (IOException ex) {
244 LOG.warn("Could not release the table lock", ex);
245 }
246 }
247 }
248
249
250
251
252
253
254
255
256
257
258 public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
259 throws SnapshotCreationException, IOException {
260 LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
261 + snapshotDir);
262 if (!fs.rename(workingDir, snapshotDir)) {
263 throw new SnapshotCreationException("Failed to move working directory(" + workingDir
264 + ") to completed directory(" + snapshotDir + ").");
265 }
266 finished = true;
267 }
268
269
270
271
272 protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
273 throws IOException, KeeperException;
274
275
276
277
278 protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
279 throws IOException {
280 snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
281 monitor.rethrowException();
282 status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() +
283 " of table: " + snapshotTable);
284 }
285
286 @Override
287 public void cancel(String why) {
288 if (finished) return;
289
290 this.finished = true;
291 LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
292 " because: " + why);
293 CancellationException ce = new CancellationException(why);
294 monitor.receive(new ForeignException(master.getServerName().toString(), ce));
295 }
296
297 @Override
298 public boolean isFinished() {
299 return finished;
300 }
301
302 @Override
303 public long getCompletionTimestamp() {
304 return this.status.getCompletionTimestamp();
305 }
306
307 @Override
308 public SnapshotDescription getSnapshot() {
309 return snapshot;
310 }
311
312 @Override
313 public ForeignException getExceptionIfFailed() {
314 return monitor.getException();
315 }
316
317 @Override
318 public void rethrowExceptionIfFailed() throws ForeignException {
319 monitor.rethrowException();
320 }
321
322 @Override
323 public void rethrowException() throws ForeignException {
324 monitor.rethrowException();
325 }
326
327 @Override
328 public boolean hasException() {
329 return monitor.hasException();
330 }
331
332 @Override
333 public ForeignException getException() {
334 return monitor.getException();
335 }
336
337 }