View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import com.google.protobuf.CodedInputStream;
22  import com.google.protobuf.InvalidProtocolBufferException;
23  
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.ThreadPoolExecutor;
32  import java.util.concurrent.TimeUnit;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FSDataOutputStream;
39  import org.apache.hadoop.fs.FileStatus;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.TableDescriptor;
46  import org.apache.hadoop.hbase.classification.InterfaceAudience;
47  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
48  import org.apache.hadoop.hbase.mob.MobUtils;
49  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
50  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
52  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
53  import org.apache.hadoop.hbase.regionserver.HRegion;
54  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
55  import org.apache.hadoop.hbase.regionserver.Store;
56  import org.apache.hadoop.hbase.regionserver.StoreFile;
57  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
58  import org.apache.hadoop.hbase.util.Bytes;
59  import org.apache.hadoop.hbase.util.FSTableDescriptors;
60  import org.apache.hadoop.hbase.util.FSUtils;
61  import org.apache.hadoop.hbase.util.Threads;
62  
63  /**
64   * Utility class to help read/write the Snapshot Manifest.
65   *
66   * The snapshot format is transparent for the users of this class,
67   * once the snapshot is written, it will never be modified.
68   * On open() the snapshot will be loaded to the current in-memory format.
69   */
70  @InterfaceAudience.Private
71  public final class SnapshotManifest {
72    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
73  
74    public static final String SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY = "snapshot.manifest.size.limit";
75  
76    public static final String DATA_MANIFEST_NAME = "data.manifest";
77  
78    private List<SnapshotRegionManifest> regionManifests;
79    private SnapshotDescription desc;
80    private HTableDescriptor htd;
81  
82    private final ForeignExceptionSnare monitor;
83    private final Configuration conf;
84    private final Path workingDir;
85    private final FileSystem fs;
86    private int manifestSizeLimit;
87  
88    private SnapshotManifest(final Configuration conf, final FileSystem fs,
89        final Path workingDir, final SnapshotDescription desc,
90        final ForeignExceptionSnare monitor) {
91      this.monitor = monitor;
92      this.desc = desc;
93      this.workingDir = workingDir;
94      this.conf = conf;
95      this.fs = fs;
96  
97      this.manifestSizeLimit = conf.getInt(SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY, 64 * 1024 * 1024);
98    }
99  
100   /**
101    * Return a SnapshotManifest instance, used for writing a snapshot.
102    *
103    * There are two usage pattern:
104    *  - The Master will create a manifest, add the descriptor, offline regions
105    *    and consolidate the snapshot by writing all the pending stuff on-disk.
106    *      manifest = SnapshotManifest.create(...)
107    *      manifest.addRegion(tableDir, hri)
108    *      manifest.consolidate()
109    *  - The RegionServer will create a single region manifest
110    *      manifest = SnapshotManifest.create(...)
111    *      manifest.addRegion(region)
112    */
113   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
114       final Path workingDir, final SnapshotDescription desc,
115       final ForeignExceptionSnare monitor) {
116     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
117   }
118 
119   /**
120    * Return a SnapshotManifest instance with the information already loaded in-memory.
121    *    SnapshotManifest manifest = SnapshotManifest.open(...)
122    *    HTableDescriptor htd = manifest.getTableDescriptor()
123    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
124    *      hri = regionManifest.getRegionInfo()
125    *      for (regionManifest.getFamilyFiles())
126    *        ...
127    */
128   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
129       final Path workingDir, final SnapshotDescription desc) throws IOException {
130     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null);
131     manifest.load();
132     return manifest;
133   }
134 
135 
136   /**
137    * Add the table descriptor to the snapshot manifest
138    */
139   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
140     this.htd = htd;
141   }
142 
143   interface RegionVisitor<TRegion, TFamily> {
144     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
145     void regionClose(final TRegion region) throws IOException;
146 
147     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
148     void familyClose(final TRegion region, final TFamily family) throws IOException;
149 
150     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
151       throws IOException;
152   }
153 
154   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
155     switch (getSnapshotFormat(desc)) {
156       case SnapshotManifestV1.DESCRIPTOR_VERSION:
157         return new SnapshotManifestV1.ManifestBuilder(conf, fs, workingDir);
158       case SnapshotManifestV2.DESCRIPTOR_VERSION:
159         return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir);
160       default:
161       throw new CorruptedSnapshotException("Invalid Snapshot version: " + desc.getVersion(),
162         ProtobufUtil.createSnapshotDesc(desc));
163     }
164   }
165
166   public void addMobRegion(HRegionInfo regionInfo, HColumnDescriptor[] hcds) throws IOException {
167     // 0. Get the ManifestBuilder/RegionVisitor
168     RegionVisitor visitor = createRegionVisitor(desc);
169
170     // 1. dump region meta info into the snapshot directory
171     LOG.debug("Storing mob region '" + regionInfo + "' region-info for snapshot.");
172     Object regionData = visitor.regionOpen(regionInfo);
173     monitor.rethrowException();
174
175     // 2. iterate through all the stores in the region
176     LOG.debug("Creating references for mob files");
177
178     Path mobRegionPath = MobUtils.getMobRegionPath(conf, regionInfo.getTable());
179     for (HColumnDescriptor hcd : hcds) {
180       // 2.1. build the snapshot reference for the store if it's a mob store
181       if (!hcd.isMobEnabled()) {
182         continue;
183       }
184       Object familyData = visitor.familyOpen(regionData, hcd.getName());
185       monitor.rethrowException();
186
187       Path storePath = MobUtils.getMobFamilyPath(mobRegionPath, hcd.getNameAsString());
188       List<StoreFileInfo> storeFiles = getStoreFiles(storePath);
189       if (storeFiles == null) {
190         if (LOG.isDebugEnabled()) {
191           LOG.debug("No mob files under family: " + hcd.getNameAsString());
192         }
193         continue;
194       }
195
196       addReferenceFiles(visitor, regionData, familyData, storeFiles, true);
197
198       visitor.familyClose(regionData, familyData);
199     }
200     visitor.regionClose(regionData);
201   }
202
203   /**
204    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
205    * This is used by the "online snapshot" when the table is enabled.
206    */
207   public void addRegion(final HRegion region) throws IOException {
208     // 0. Get the ManifestBuilder/RegionVisitor
209     RegionVisitor visitor = createRegionVisitor(desc);
210
211     // 1. dump region meta info into the snapshot directory
212     LOG.debug("Storing '" + region + "' region-info for snapshot.");
213     Object regionData = visitor.regionOpen(region.getRegionInfo());
214     monitor.rethrowException();
215
216     // 2. iterate through all the stores in the region
217     LOG.debug("Creating references for hfiles");
218
219     for (Store store : region.getStores()) {
220       // 2.1. build the snapshot reference for the store
221       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
222       monitor.rethrowException();
223
224       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
225       if (LOG.isDebugEnabled()) {
226         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
227       }
228
229       // 2.2. iterate through all the store's files and create "references".
230       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
231         StoreFile storeFile = storeFiles.get(i);
232         monitor.rethrowException();
233
234         // create "reference" to this store file.
235         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
236         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
237       }
238       visitor.familyClose(regionData, familyData);
239     }
240     visitor.regionClose(regionData);
241   }
242
243   /**
244    * Creates a 'manifest' for the specified region, by reading directly from the disk.
245    * This is used by the "offline snapshot" when the table is disabled.
246    */
247   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
248     // 0. Get the ManifestBuilder/RegionVisitor
249     RegionVisitor visitor = createRegionVisitor(desc);
250
251     boolean isMobRegion = MobUtils.isMobRegionInfo(regionInfo);
252     try {
253       // Open the RegionFS
254       HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
255             tableDir, regionInfo, true);
256       monitor.rethrowException();
257
258       // 1. dump region meta info into the snapshot directory
259       LOG.debug("Storing region-info for snapshot.");
260       Object regionData = visitor.regionOpen(regionInfo);
261       monitor.rethrowException();
262
263       // 2. iterate through all the stores in the region
264       LOG.debug("Creating references for hfiles");
265
266       // This ensures that we have an atomic view of the directory as long as we have < ls limit
267       // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files
268       // in batches and may miss files being added/deleted. This could be more robust (iteratively
269       // checking to see if we have all the files until we are sure), but the limit is currently
270       // 1000 files/batch, far more than the number of store files under a single column family.
271       Collection<String> familyNames = regionFs.getFamilies();
272       if (familyNames != null) {
273         for (String familyName: familyNames) {
274           Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
275           monitor.rethrowException();
276
277           Collection<StoreFileInfo> storeFiles = null;
278           if (isMobRegion) {
279             Path regionPath = MobUtils.getMobRegionPath(conf, regionInfo.getTable());
280             Path storePath = MobUtils.getMobFamilyPath(regionPath, familyName);
281             storeFiles = getStoreFiles(storePath);
282           } else {
283             storeFiles = regionFs.getStoreFiles(familyName);
284           }
285
286           if (storeFiles == null) {
287             if (LOG.isDebugEnabled()) {
288               LOG.debug("No files under family: " + familyName);
289             }
290             continue;
291           }
292
293           // 2.1. build the snapshot reference for the store
294           // iterate through all the store's files and create "references".
295           addReferenceFiles(visitor, regionData, familyData, storeFiles, false);
296
297           visitor.familyClose(regionData, familyData);
298         }
299       }
300       visitor.regionClose(regionData);
301     } catch (IOException e) {
302       // the mob directory might not be created yet, so do nothing when it is a mob region
303       if (!isMobRegion) {
304         throw e;
305       }
306     }
307   }
308
309   private List<StoreFileInfo> getStoreFiles(Path storeDir) throws IOException {
310     FileStatus[] stats = FSUtils.listStatus(fs, storeDir);
311     if (stats == null) return null;
312
313     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(stats.length);
314     for (int i = 0; i < stats.length; ++i) {
315       storeFiles.add(new StoreFileInfo(conf, fs, stats[i]));
316     }
317     return storeFiles;
318   }
319
320   private void addReferenceFiles(RegionVisitor visitor, Object regionData, Object familyData,
321       Collection<StoreFileInfo> storeFiles, boolean isMob) throws IOException {
322     final String fileType = isMob ? "mob file" : "hfile";
323
324     if (LOG.isDebugEnabled()) {
325       LOG.debug(String.format("Adding snapshot references for %s %ss", storeFiles, fileType));
326     }
327
328     int i = 0;
329     int sz = storeFiles.size();
330     for (StoreFileInfo storeFile: storeFiles) {
331       monitor.rethrowException();
332
333       LOG.debug(String.format("Adding reference for %s (%d/%d): %s",
334           fileType, ++i, sz, storeFile.getPath()));
335
336       // create "reference" to this store file.
337       visitor.storeFile(regionData, familyData, storeFile);
338     }
339   }
340
341   /**
342    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
343    *
344    * If the format is v2 and there is no data-manifest, means that we are loading an
345    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
346    * regions format.
347    */
348   private void load() throws IOException {
349     switch (getSnapshotFormat(desc)) {
350       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
351         this.htd = FSTableDescriptors.getTableDescriptorFromFs(fs, workingDir)
352             .getHTableDescriptor();
353         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
354         try {
355           this.regionManifests =
356             SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
357         } finally {
358           tpool.shutdown();
359         }
360         break;
361       }
362       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
363         SnapshotDataManifest dataManifest = readDataManifest();
364         if (dataManifest != null) {
365           htd = ProtobufUtil.convertToHTableDesc(dataManifest.getTableSchema());
366           regionManifests = dataManifest.getRegionManifestsList();
367         } else {
368           // Compatibility, load the v1 regions
369           // This happens only when the snapshot is in-progress and the cache wants to refresh.
370           List<SnapshotRegionManifest> v1Regions, v2Regions;
371           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
372           try {
373             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
374             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
375           } catch (InvalidProtocolBufferException e) {
376             throw new CorruptedSnapshotException("unable to parse region manifest " +
377                 e.getMessage(), e);
378           } finally {
379             tpool.shutdown();
380           }
381           if (v1Regions != null && v2Regions != null) {
382             regionManifests =
383               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
384             regionManifests.addAll(v1Regions);
385             regionManifests.addAll(v2Regions);
386           } else if (v1Regions != null) {
387             regionManifests = v1Regions;
388           } else /* if (v2Regions != null) */ {
389             regionManifests = v2Regions;
390           }
391         }
392         break;
393       }
394       default:
395       throw new CorruptedSnapshotException("Invalid Snapshot version: " + desc.getVersion(),
396         ProtobufUtil.createSnapshotDesc(desc));
397     }
398   }
399
400   /**
401    * Get the current snapshot working dir
402    */
403   public Path getSnapshotDir() {
404     return this.workingDir;
405   }
406
407   /**
408    * Get the SnapshotDescription
409    */
410   public SnapshotDescription getSnapshotDescription() {
411     return this.desc;
412   }
413
414   /**
415    * Get the table descriptor from the Snapshot
416    */
417   public HTableDescriptor getTableDescriptor() {
418     return this.htd;
419   }
420
421   /**
422    * Get all the Region Manifest from the snapshot
423    */
424   public List<SnapshotRegionManifest> getRegionManifests() {
425     return this.regionManifests;
426   }
427
428   /**
429    * Get all the Region Manifest from the snapshot.
430    * This is an helper to get a map with the region encoded name
431    */
432   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
433     if (regionManifests == null || regionManifests.size() == 0) return null;
434
435     HashMap<String, SnapshotRegionManifest> regionsMap =
436         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
437     for (SnapshotRegionManifest manifest: regionManifests) {
438       String regionName = getRegionNameFromManifest(manifest);
439       regionsMap.put(regionName, manifest);
440     }
441     return regionsMap;
442   }
443
444   public void consolidate() throws IOException {
445     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
446       Path rootDir = FSUtils.getRootDir(conf);
447       LOG.info("Using old Snapshot Format");
448       // write a copy of descriptor to the snapshot directory
449       new FSTableDescriptors(conf, fs, rootDir)
450         .createTableDescriptorForTableDirectory(workingDir, new TableDescriptor(
451             htd), false);
452     } else {
453       LOG.debug("Convert to Single Snapshot Manifest");
454       convertToV2SingleManifest();
455     }
456   }
457
458   /*
459    * In case of rolling-upgrade, we try to read all the formats and build
460    * the snapshot with the latest format.
461    */
462   private void convertToV2SingleManifest() throws IOException {
463     // Try to load v1 and v2 regions
464     List<SnapshotRegionManifest> v1Regions, v2Regions;
465     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
466     try {
467       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
468       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
469     } finally {
470       tpool.shutdown();
471     }
472
473     SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
474     dataManifestBuilder.setTableSchema(ProtobufUtil.convertToTableSchema(htd));
475
476     if (v1Regions != null && v1Regions.size() > 0) {
477       dataManifestBuilder.addAllRegionManifests(v1Regions);
478     }
479     if (v2Regions != null && v2Regions.size() > 0) {
480       dataManifestBuilder.addAllRegionManifests(v2Regions);
481     }
482
483     // Write the v2 Data Manifest.
484     // Once the data-manifest is written, the snapshot can be considered complete.
485     // Currently snapshots are written in a "temporary" directory and later
486     // moved to the "complated" snapshot directory.
487     SnapshotDataManifest dataManifest = dataManifestBuilder.build();
488     writeDataManifest(dataManifest);
489     this.regionManifests = dataManifest.getRegionManifestsList();
490
491     // Remove the region manifests. Everything is now in the data-manifest.
492     // The delete operation is "relaxed", unless we get an exception we keep going.
493     // The extra files in the snapshot directory will not give any problem,
494     // since they have the same content as the data manifest, and even by re-reading
495     // them we will get the same information.
496     if (v1Regions != null && v1Regions.size() > 0) {
497       for (SnapshotRegionManifest regionManifest: v1Regions) {
498         SnapshotManifestV1.deleteRegionManifest(fs, workingDir, regionManifest);
499       }
500     }
501     if (v2Regions != null && v2Regions.size() > 0) {
502       for (SnapshotRegionManifest regionManifest: v2Regions) {
503         SnapshotManifestV2.deleteRegionManifest(fs, workingDir, regionManifest);
504       }
505     }
506   }
507
508   /*
509    * Write the SnapshotDataManifest file
510    */
511   private void writeDataManifest(final SnapshotDataManifest manifest)
512       throws IOException {
513     FSDataOutputStream stream = fs.create(new Path(workingDir, DATA_MANIFEST_NAME));
514     try {
515       manifest.writeTo(stream);
516     } finally {
517       stream.close();
518     }
519   }
520
521   /*
522    * Read the SnapshotDataManifest file
523    */
524   private SnapshotDataManifest readDataManifest() throws IOException {
525     FSDataInputStream in = null;
526     try {
527       in = fs.open(new Path(workingDir, DATA_MANIFEST_NAME));
528       CodedInputStream cin = CodedInputStream.newInstance(in);
529       cin.setSizeLimit(manifestSizeLimit);
530       return SnapshotDataManifest.parseFrom(cin);
531     } catch (FileNotFoundException e) {
532       return null;
533     } catch (InvalidProtocolBufferException e) {
534       throw new CorruptedSnapshotException("unable to parse data manifest " + e.getMessage(), e);
535     } finally {
536       if (in != null) in.close();
537     }
538   }
539
540   private ThreadPoolExecutor createExecutor(final String name) {
541     return createExecutor(conf, name);
542   }
543
544   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
545     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
546     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
547               Threads.getNamedThreadFactory(name));
548   }
549
550   /**
551    * Extract the region encoded name from the region manifest
552    */
553   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
554     byte[] regionName = HRegionInfo.createRegionName(
555             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
556             manifest.getRegionInfo().getStartKey().toByteArray(),
557             manifest.getRegionInfo().getRegionId(), true);
558     return HRegionInfo.encodeRegionName(regionName);
559   }
560
561   /*
562    * Return the snapshot format
563    */
564   private static int getSnapshotFormat(final SnapshotDescription desc) {
565     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
566   }
567 }