View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.concurrent.ThreadPoolExecutor;
29  import java.util.concurrent.TimeUnit;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.HRegionInfo;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.TableDescriptor;
44  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
45  import org.apache.hadoop.hbase.mob.MobUtils;
46  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
47  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
48  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
49  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
50  import org.apache.hadoop.hbase.regionserver.HRegion;
51  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
52  import org.apache.hadoop.hbase.regionserver.Store;
53  import org.apache.hadoop.hbase.regionserver.StoreFile;
54  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
55  import org.apache.hadoop.hbase.util.Bytes;
56  import org.apache.hadoop.hbase.util.FSTableDescriptors;
57  import org.apache.hadoop.hbase.util.FSUtils;
58  import org.apache.hadoop.hbase.util.Threads;
59  
60  /**
61   * Utility class to help read/write the Snapshot Manifest.
62   *
63   * The snapshot format is transparent for the users of this class,
64   * once the snapshot is written, it will never be modified.
65   * On open() the snapshot will be loaded to the current in-memory format.
66   */
67  @InterfaceAudience.Private
68  public class SnapshotManifest {
69    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
70  
71    private static final String DATA_MANIFEST_NAME = "data.manifest";
72  
73    private List<SnapshotRegionManifest> regionManifests;
74    private SnapshotDescription desc;
75    private HTableDescriptor htd;
76  
77    private final ForeignExceptionSnare monitor;
78    private final Configuration conf;
79    private final Path workingDir;
80    private final FileSystem fs;
81  
82    private SnapshotManifest(final Configuration conf, final FileSystem fs,
83        final Path workingDir, final SnapshotDescription desc,
84        final ForeignExceptionSnare monitor) {
85      this.monitor = monitor;
86      this.desc = desc;
87      this.workingDir = workingDir;
88      this.conf = conf;
89      this.fs = fs;
90    }
91  
92    /**
93     * Return a SnapshotManifest instance, used for writing a snapshot.
94     *
95     * There are two usage pattern:
96     *  - The Master will create a manifest, add the descriptor, offline regions
97     *    and consolidate the snapshot by writing all the pending stuff on-disk.
98     *      manifest = SnapshotManifest.create(...)
99     *      manifest.addRegion(tableDir, hri)
100    *      manifest.consolidate()
101    *  - The RegionServer will create a single region manifest
102    *      manifest = SnapshotManifest.create(...)
103    *      manifest.addRegion(region)
104    */
105   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
106       final Path workingDir, final SnapshotDescription desc,
107       final ForeignExceptionSnare monitor) {
108     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
109   }
110 
111   /**
112    * Return a SnapshotManifest instance with the information already loaded in-memory.
113    *    SnapshotManifest manifest = SnapshotManifest.open(...)
114    *    HTableDescriptor htd = manifest.getTableDescriptor()
115    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
116    *      hri = regionManifest.getRegionInfo()
117    *      for (regionManifest.getFamilyFiles())
118    *        ...
119    */
120   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
121       final Path workingDir, final SnapshotDescription desc) throws IOException {
122     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null);
123     manifest.load();
124     return manifest;
125   }
126 
127 
128   /**
129    * Add the table descriptor to the snapshot manifest
130    */
131   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
132     this.htd = htd;
133   }
134 
135   interface RegionVisitor<TRegion, TFamily> {
136     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
137     void regionClose(final TRegion region) throws IOException;
138 
139     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
140     void familyClose(final TRegion region, final TFamily family) throws IOException;
141 
142     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
143       throws IOException;
144   }
145 
146   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
147     switch (getSnapshotFormat(desc)) {
148       case SnapshotManifestV1.DESCRIPTOR_VERSION:
149         return new SnapshotManifestV1.ManifestBuilder(conf, fs, workingDir);
150       case SnapshotManifestV2.DESCRIPTOR_VERSION:
151         return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir);
152       default:
153         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
154     }
155   }
156 
157   public void addMobRegion(HRegionInfo regionInfo, HColumnDescriptor[] hcds) throws IOException {
158     // 0. Get the ManifestBuilder/RegionVisitor
159     RegionVisitor visitor = createRegionVisitor(desc);
160 
161     // 1. dump region meta info into the snapshot directory
162     LOG.debug("Storing mob region '" + regionInfo + "' region-info for snapshot.");
163     Object regionData = visitor.regionOpen(regionInfo);
164     monitor.rethrowException();
165 
166     // 2. iterate through all the stores in the region
167     LOG.debug("Creating references for mob files");
168 
169     Path mobRegionPath = MobUtils.getMobRegionPath(conf, regionInfo.getTable());
170     for (HColumnDescriptor hcd : hcds) {
171       // 2.1. build the snapshot reference for the store if it's a mob store
172       if (!hcd.isMobEnabled()) {
173         continue;
174       }
175       Object familyData = visitor.familyOpen(regionData, hcd.getName());
176       monitor.rethrowException();
177 
178       Path storePath = MobUtils.getMobFamilyPath(mobRegionPath, hcd.getNameAsString());
179       if (!fs.exists(storePath)) {
180         continue;
181       }
182       FileStatus[] stats = fs.listStatus(storePath);
183       if (stats == null) {
184         continue;
185       }
186       List<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>();
187       for (FileStatus stat : stats) {
188         storeFiles.add(new StoreFileInfo(conf, fs, stat));
189       }
190       if (LOG.isDebugEnabled()) {
191         LOG.debug("Adding snapshot references for " + storeFiles + " mob files");
192       }
193 
194       // 2.2. iterate through all the mob files and create "references".
195       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
196         StoreFileInfo storeFile = storeFiles.get(i);
197         monitor.rethrowException();
198 
199         // create "reference" to this store file.
200         if (LOG.isDebugEnabled()) {
201           LOG.debug("Adding reference for mob file (" + (i + 1) + "/" + sz + "): "
202             + storeFile.getPath());
203         }
204         visitor.storeFile(regionData, familyData, storeFile);
205       }
206       visitor.familyClose(regionData, familyData);
207     }
208     visitor.regionClose(regionData);
209   }
210 
211   /**
212    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
213    * This is used by the "online snapshot" when the table is enabled.
214    */
215   public void addRegion(final HRegion region) throws IOException {
216     // 0. Get the ManifestBuilder/RegionVisitor
217     RegionVisitor visitor = createRegionVisitor(desc);
218 
219     // 1. dump region meta info into the snapshot directory
220     LOG.debug("Storing '" + region + "' region-info for snapshot.");
221     Object regionData = visitor.regionOpen(region.getRegionInfo());
222     monitor.rethrowException();
223 
224     // 2. iterate through all the stores in the region
225     LOG.debug("Creating references for hfiles");
226 
227     for (Store store : region.getStores()) {
228       // 2.1. build the snapshot reference for the store
229       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
230       monitor.rethrowException();
231 
232       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
233       if (LOG.isDebugEnabled()) {
234         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
235       }
236 
237       // 2.2. iterate through all the store's files and create "references".
238       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
239         StoreFile storeFile = storeFiles.get(i);
240         monitor.rethrowException();
241 
242         // create "reference" to this store file.
243         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
244         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
245       }
246       visitor.familyClose(regionData, familyData);
247     }
248     visitor.regionClose(regionData);
249   }
250 
251   /**
252    * Creates a 'manifest' for the specified region, by reading directly from the disk.
253    * This is used by the "offline snapshot" when the table is disabled.
254    */
255   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
256     // 0. Get the ManifestBuilder/RegionVisitor
257     RegionVisitor visitor = createRegionVisitor(desc);
258 
259     boolean isMobRegion = MobUtils.isMobRegionInfo(regionInfo);
260     try {
261       // Open the RegionFS
262       HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
263             tableDir, regionInfo, true);
264       monitor.rethrowException();
265 
266       // 1. dump region meta info into the snapshot directory
267       LOG.debug("Storing region-info for snapshot.");
268       Object regionData = visitor.regionOpen(regionInfo);
269       monitor.rethrowException();
270 
271       // 2. iterate through all the stores in the region
272       LOG.debug("Creating references for hfiles");
273 
274       // This ensures that we have an atomic view of the directory as long as we have < ls limit
275       // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files
276       // in batches and may miss files being added/deleted. This could be more robust (iteratively
277       // checking to see if we have all the files until we are sure), but the limit is currently
278       // 1000 files/batch, far more than the number of store files under a single column family.
279       Collection<String> familyNames = regionFs.getFamilies();
280       if (familyNames != null) {
281         for (String familyName: familyNames) {
282           Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
283           monitor.rethrowException();
284 
285           Collection<StoreFileInfo> storeFiles = null;
286           if (isMobRegion) {
287             Path regionPath = MobUtils.getMobRegionPath(conf, regionInfo.getTable());
288             Path storePath = MobUtils.getMobFamilyPath(regionPath, familyName);
289             if (!fs.exists(storePath)) {
290               continue;
291             }
292             FileStatus[] stats = fs.listStatus(storePath);
293             if (stats == null) {
294               continue;
295             }
296             storeFiles = new ArrayList<StoreFileInfo>();
297             for (FileStatus stat : stats) {
298               storeFiles.add(new StoreFileInfo(conf, fs, stat));
299             }
300           } else {
301             storeFiles = regionFs.getStoreFiles(familyName);
302           }
303           if (storeFiles == null) {
304             if (LOG.isDebugEnabled()) {
305               LOG.debug("No files under family: " + familyName);
306             }
307             continue;
308           }
309 
310           // 2.1. build the snapshot reference for the store
311           if (LOG.isDebugEnabled()) {
312             LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
313           }
314 
315           // 2.2. iterate through all the store's files and create "references".
316           int i = 0;
317           int sz = storeFiles.size();
318           for (StoreFileInfo storeFile: storeFiles) {
319             monitor.rethrowException();
320 
321             // create "reference" to this store file.
322             LOG.debug("Adding reference for file (" + (++i) + "/" + sz + "): "
323                 + storeFile.getPath());
324             visitor.storeFile(regionData, familyData, storeFile);
325           }
326           visitor.familyClose(regionData, familyData);
327         }
328       }
329       visitor.regionClose(regionData);
330     } catch (IOException e) {
331       // the mob directory might not be created yet, so do nothing when it is a mob region
332       if (!isMobRegion) {
333         throw e;
334       }
335     }
336   }
337 
338   /**
339    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
340    *
341    * If the format is v2 and there is no data-manifest, means that we are loading an
342    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
343    * regions format.
344    */
345   private void load() throws IOException {
346     switch (getSnapshotFormat(desc)) {
347       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
348         this.htd = FSTableDescriptors.getTableDescriptorFromFs(fs, workingDir)
349             .getHTableDescriptor();
350         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
351         try {
352           this.regionManifests =
353             SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
354         } finally {
355           tpool.shutdown();
356         }
357         break;
358       }
359       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
360         SnapshotDataManifest dataManifest = readDataManifest();
361         if (dataManifest != null) {
362           htd = HTableDescriptor.convert(dataManifest.getTableSchema());
363           regionManifests = dataManifest.getRegionManifestsList();
364         } else {
365           // Compatibility, load the v1 regions
366           // This happens only when the snapshot is in-progress and the cache wants to refresh.
367           List<SnapshotRegionManifest> v1Regions, v2Regions;
368           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
369           try {
370             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
371             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
372           } finally {
373             tpool.shutdown();
374           }
375           if (v1Regions != null && v2Regions != null) {
376             regionManifests =
377               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
378             regionManifests.addAll(v1Regions);
379             regionManifests.addAll(v2Regions);
380           } else if (v1Regions != null) {
381             regionManifests = v1Regions;
382           } else /* if (v2Regions != null) */ {
383             regionManifests = v2Regions;
384           }
385         }
386         break;
387       }
388       default:
389         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
390     }
391   }
392 
393   /**
394    * Get the current snapshot working dir
395    */
396   public Path getSnapshotDir() {
397     return this.workingDir;
398   }
399 
400   /**
401    * Get the SnapshotDescription
402    */
403   public SnapshotDescription getSnapshotDescription() {
404     return this.desc;
405   }
406 
407   /**
408    * Get the table descriptor from the Snapshot
409    */
410   public HTableDescriptor getTableDescriptor() {
411     return this.htd;
412   }
413 
414   /**
415    * Get all the Region Manifest from the snapshot
416    */
417   public List<SnapshotRegionManifest> getRegionManifests() {
418     return this.regionManifests;
419   }
420 
421   /**
422    * Get all the Region Manifest from the snapshot.
423    * This is an helper to get a map with the region encoded name
424    */
425   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
426     if (regionManifests == null || regionManifests.size() == 0) return null;
427 
428     HashMap<String, SnapshotRegionManifest> regionsMap =
429         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
430     for (SnapshotRegionManifest manifest: regionManifests) {
431       String regionName = getRegionNameFromManifest(manifest);
432       regionsMap.put(regionName, manifest);
433     }
434     return regionsMap;
435   }
436 
437   public void consolidate() throws IOException {
438     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
439       Path rootDir = FSUtils.getRootDir(conf);
440       LOG.info("Using old Snapshot Format");
441       // write a copy of descriptor to the snapshot directory
442       new FSTableDescriptors(conf, fs, rootDir)
443         .createTableDescriptorForTableDirectory(workingDir, new TableDescriptor(
444             htd), false);
445     } else {
446       LOG.debug("Convert to Single Snapshot Manifest");
447       convertToV2SingleManifest();
448     }
449   }
450 
451   /*
452    * In case of rolling-upgrade, we try to read all the formats and build
453    * the snapshot with the latest format.
454    */
455   private void convertToV2SingleManifest() throws IOException {
456     // Try to load v1 and v2 regions
457     List<SnapshotRegionManifest> v1Regions, v2Regions;
458     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
459     try {
460       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
461       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
462     } finally {
463       tpool.shutdown();
464     }
465 
466     SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
467     dataManifestBuilder.setTableSchema(htd.convert());
468 
469     if (v1Regions != null && v1Regions.size() > 0) {
470       dataManifestBuilder.addAllRegionManifests(v1Regions);
471     }
472     if (v2Regions != null && v2Regions.size() > 0) {
473       dataManifestBuilder.addAllRegionManifests(v2Regions);
474     }
475 
476     // Write the v2 Data Manifest.
477     // Once the data-manifest is written, the snapshot can be considered complete.
478     // Currently snapshots are written in a "temporary" directory and later
479     // moved to the "complated" snapshot directory.
480     SnapshotDataManifest dataManifest = dataManifestBuilder.build();
481     writeDataManifest(dataManifest);
482     this.regionManifests = dataManifest.getRegionManifestsList();
483 
484     // Remove the region manifests. Everything is now in the data-manifest.
485     // The delete operation is "relaxed", unless we get an exception we keep going.
486     // The extra files in the snapshot directory will not give any problem,
487     // since they have the same content as the data manifest, and even by re-reading
488     // them we will get the same information.
489     if (v1Regions != null && v1Regions.size() > 0) {
490       for (SnapshotRegionManifest regionManifest: v1Regions) {
491         SnapshotManifestV1.deleteRegionManifest(fs, workingDir, regionManifest);
492       }
493     }
494     if (v2Regions != null && v2Regions.size() > 0) {
495       for (SnapshotRegionManifest regionManifest: v2Regions) {
496         SnapshotManifestV2.deleteRegionManifest(fs, workingDir, regionManifest);
497       }
498     }
499   }
500 
501   /*
502    * Write the SnapshotDataManifest file
503    */
504   private void writeDataManifest(final SnapshotDataManifest manifest)
505       throws IOException {
506     FSDataOutputStream stream = fs.create(new Path(workingDir, DATA_MANIFEST_NAME));
507     try {
508       manifest.writeTo(stream);
509     } finally {
510       stream.close();
511     }
512   }
513 
514   /*
515    * Read the SnapshotDataManifest file
516    */
517   private SnapshotDataManifest readDataManifest() throws IOException {
518     FSDataInputStream in = null;
519     try {
520       in = fs.open(new Path(workingDir, DATA_MANIFEST_NAME));
521       return SnapshotDataManifest.parseFrom(in);
522     } catch (FileNotFoundException e) {
523       return null;
524     } finally {
525       if (in != null) in.close();
526     }
527   }
528 
529   private ThreadPoolExecutor createExecutor(final String name) {
530     return createExecutor(conf, name);
531   }
532 
533   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
534     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
535     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
536               Threads.getNamedThreadFactory(name));
537   }
538 
539   /**
540    * Extract the region encoded name from the region manifest
541    */
542   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
543     byte[] regionName = HRegionInfo.createRegionName(
544             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
545             manifest.getRegionInfo().getStartKey().toByteArray(),
546             manifest.getRegionInfo().getRegionId(), true);
547     return HRegionInfo.encodeRegionName(regionName);
548   }
549 
550   /*
551    * Return the snapshot format
552    */
553   private static int getSnapshotFormat(final SnapshotDescription desc) {
554     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
555   }
556 }