View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.IOException;
22  import java.io.FileNotFoundException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.concurrent.ThreadPoolExecutor;
29  import java.util.concurrent.TimeUnit;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
42  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
43  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
44  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
45  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
46  import org.apache.hadoop.hbase.regionserver.HRegion;
47  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
48  import org.apache.hadoop.hbase.regionserver.Store;
49  import org.apache.hadoop.hbase.regionserver.StoreFile;
50  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.FSUtils;
53  import org.apache.hadoop.hbase.util.FSTableDescriptors;
54  import org.apache.hadoop.hbase.util.Threads;
55  
56  /**
57   * Utility class to help read/write the Snapshot Manifest.
58   *
59   * The snapshot format is transparent for the users of this class,
60   * once the snapshot is written, it will never be modified.
61   * On open() the snapshot will be loaded to the current in-memory format.
62   */
63  @InterfaceAudience.Private
64  public class SnapshotManifest {
65    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
66  
67    private static final String DATA_MANIFEST_NAME = "data.manifest";
68  
69    private List<SnapshotRegionManifest> regionManifests;
70    private SnapshotDescription desc;
71    private HTableDescriptor htd;
72  
73    private final ForeignExceptionSnare monitor;
74    private final Configuration conf;
75    private final Path workingDir;
76    private final FileSystem fs;
77  
78    private SnapshotManifest(final Configuration conf, final FileSystem fs,
79        final Path workingDir, final SnapshotDescription desc,
80        final ForeignExceptionSnare monitor) {
81      this.monitor = monitor;
82      this.desc = desc;
83      this.workingDir = workingDir;
84      this.conf = conf;
85      this.fs = fs;
86    }
87  
88    /**
89     * Return a SnapshotManifest instance, used for writing a snapshot.
90     *
91     * There are two usage pattern:
92     *  - The Master will create a manifest, add the descriptor, offline regions
93     *    and consolidate the snapshot by writing all the pending stuff on-disk.
94     *      manifest = SnapshotManifest.create(...)
95     *      manifest.addRegion(tableDir, hri)
96     *      manifest.consolidate()
97     *  - The RegionServer will create a single region manifest
98     *      manifest = SnapshotManifest.create(...)
99     *      manifest.addRegion(region)
100    */
101   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
102       final Path workingDir, final SnapshotDescription desc,
103       final ForeignExceptionSnare monitor) {
104     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
105   }
106 
107   /**
108    * Return a SnapshotManifest instance with the information already loaded in-memory.
109    *    SnapshotManifest manifest = SnapshotManifest.open(...)
110    *    HTableDescriptor htd = manifest.getTableDescriptor()
111    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
112    *      hri = regionManifest.getRegionInfo()
113    *      for (regionManifest.getFamilyFiles())
114    *        ...
115    */
116   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
117       final Path workingDir, final SnapshotDescription desc) throws IOException {
118     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null);
119     manifest.load();
120     return manifest;
121   }
122 
123 
124   /**
125    * Add the table descriptor to the snapshot manifest
126    */
127   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
128     this.htd = htd;
129   }
130 
131   interface RegionVisitor<TRegion, TFamily> {
132     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
133     void regionClose(final TRegion region) throws IOException;
134 
135     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
136     void familyClose(final TRegion region, final TFamily family) throws IOException;
137 
138     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
139       throws IOException;
140   }
141 
142   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
143     switch (getSnapshotFormat(desc)) {
144       case SnapshotManifestV1.DESCRIPTOR_VERSION:
145         return new SnapshotManifestV1.ManifestBuilder(conf, fs, workingDir);
146       case SnapshotManifestV2.DESCRIPTOR_VERSION:
147         return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir);
148       default:
149         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
150     }
151   }
152 
153   /**
154    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
155    * This is used by the "online snapshot" when the table is enabled.
156    */
157   public void addRegion(final HRegion region) throws IOException {
158     // 0. Get the ManifestBuilder/RegionVisitor
159     RegionVisitor visitor = createRegionVisitor(desc);
160 
161     // 1. dump region meta info into the snapshot directory
162     LOG.debug("Storing '" + region + "' region-info for snapshot.");
163     Object regionData = visitor.regionOpen(region.getRegionInfo());
164     monitor.rethrowException();
165 
166     // 2. iterate through all the stores in the region
167     LOG.debug("Creating references for hfiles");
168 
169     for (Store store : region.getStores().values()) {
170       // 2.1. build the snapshot reference for the store
171       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
172       monitor.rethrowException();
173 
174       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
175       if (LOG.isDebugEnabled()) {
176         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
177       }
178 
179       // 2.2. iterate through all the store's files and create "references".
180       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
181         StoreFile storeFile = storeFiles.get(i);
182         monitor.rethrowException();
183 
184         // create "reference" to this store file.
185         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
186         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
187       }
188       visitor.familyClose(regionData, familyData);
189     }
190     visitor.regionClose(regionData);
191   }
192 
193   /**
194    * Creates a 'manifest' for the specified region, by reading directly from the disk.
195    * This is used by the "offline snapshot" when the table is disabled.
196    */
197   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
198     // 0. Get the ManifestBuilder/RegionVisitor
199     RegionVisitor visitor = createRegionVisitor(desc);
200 
201     // Open the RegionFS
202     HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
203           tableDir, regionInfo, true);
204     monitor.rethrowException();
205 
206     // 1. dump region meta info into the snapshot directory
207     LOG.debug("Storing region-info for snapshot.");
208     Object regionData = visitor.regionOpen(regionInfo);
209     monitor.rethrowException();
210 
211     // 2. iterate through all the stores in the region
212     LOG.debug("Creating references for hfiles");
213 
214     // This ensures that we have an atomic view of the directory as long as we have < ls limit
215     // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in
216     // batches and may miss files being added/deleted. This could be more robust (iteratively
217     // checking to see if we have all the files until we are sure), but the limit is currently 1000
218     // files/batch, far more than the number of store files under a single column family.
219     Collection<String> familyNames = regionFs.getFamilies();
220     if (familyNames != null) {
221       for (String familyName: familyNames) {
222         Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
223         monitor.rethrowException();
224 
225         Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(familyName);
226         if (storeFiles == null) {
227           LOG.debug("No files under family: " + familyName);
228           continue;
229         }
230 
231         // 2.1. build the snapshot reference for the store
232         if (LOG.isDebugEnabled()) {
233           LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
234         }
235 
236         // 2.2. iterate through all the store's files and create "references".
237         int i = 0;
238         int sz = storeFiles.size();
239         for (StoreFileInfo storeFile: storeFiles) {
240           monitor.rethrowException();
241 
242           // create "reference" to this store file.
243           LOG.debug("Adding reference for file ("+ (++i) +"/" + sz + "): " + storeFile.getPath());
244           visitor.storeFile(regionData, familyData, storeFile);
245         }
246         visitor.familyClose(regionData, familyData);
247       }
248     }
249     visitor.regionClose(regionData);
250   }
251 
252   /**
253    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
254    *
255    * If the format is v2 and there is no data-manifest, means that we are loading an
256    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
257    * regions format.
258    */
259   private void load() throws IOException {
260     switch (getSnapshotFormat(desc)) {
261       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
262         this.htd = FSTableDescriptors.getTableDescriptorFromFs(fs, workingDir);
263         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
264         try {
265           this.regionManifests =
266             SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
267         } finally {
268           tpool.shutdown();
269         }
270         break;
271       }
272       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
273         SnapshotDataManifest dataManifest = readDataManifest();
274         if (dataManifest != null) {
275           htd = HTableDescriptor.convert(dataManifest.getTableSchema());
276           regionManifests = dataManifest.getRegionManifestsList();
277         } else {
278           // Compatibility, load the v1 regions
279           // This happens only when the snapshot is in-progress and the cache wants to refresh.
280           List<SnapshotRegionManifest> v1Regions, v2Regions;
281           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
282           try {
283             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
284             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
285           } finally {
286             tpool.shutdown();
287           }
288           if (v1Regions != null && v2Regions != null) {
289             regionManifests =
290               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
291             regionManifests.addAll(v1Regions);
292             regionManifests.addAll(v2Regions);
293           } else if (v1Regions != null) {
294             regionManifests = v1Regions;
295           } else /* if (v2Regions != null) */ {
296             regionManifests = v2Regions;
297           }
298         }
299         break;
300       }
301       default:
302         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
303     }
304   }
305 
306   /**
307    * Get the current snapshot working dir
308    */
309   public Path getSnapshotDir() {
310     return this.workingDir;
311   }
312 
313   /**
314    * Get the SnapshotDescription
315    */
316   public SnapshotDescription getSnapshotDescription() {
317     return this.desc;
318   }
319 
320   /**
321    * Get the table descriptor from the Snapshot
322    */
323   public HTableDescriptor getTableDescriptor() {
324     return this.htd;
325   }
326 
327   /**
328    * Get all the Region Manifest from the snapshot
329    */
330   public List<SnapshotRegionManifest> getRegionManifests() {
331     return this.regionManifests;
332   }
333 
334   /**
335    * Get all the Region Manifest from the snapshot.
336    * This is an helper to get a map with the region encoded name
337    */
338   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
339     if (regionManifests == null || regionManifests.size() == 0) return null;
340 
341     HashMap<String, SnapshotRegionManifest> regionsMap =
342         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
343     for (SnapshotRegionManifest manifest: regionManifests) {
344       String regionName = getRegionNameFromManifest(manifest);
345       regionsMap.put(regionName, manifest);
346     }
347     return regionsMap;
348   }
349 
350   public void consolidate() throws IOException {
351     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
352       Path rootDir = FSUtils.getRootDir(conf);
353       LOG.info("Using old Snapshot Format");
354       // write a copy of descriptor to the snapshot directory
355       new FSTableDescriptors(fs, rootDir)
356         .createTableDescriptorForTableDirectory(workingDir, htd, false);
357     } else {
358       LOG.debug("Convert to Single Snapshot Manifest");
359       convertToV2SingleManifest();
360     }
361   }
362 
363   /*
364    * In case of rolling-upgrade, we try to read all the formats and build
365    * the snapshot with the latest format.
366    */
367   private void convertToV2SingleManifest() throws IOException {
368     // Try to load v1 and v2 regions
369     List<SnapshotRegionManifest> v1Regions, v2Regions;
370     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
371     try {
372       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
373       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
374     } finally {
375       tpool.shutdown();
376     }
377 
378     SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
379     dataManifestBuilder.setTableSchema(htd.convert());
380 
381     if (v1Regions != null && v1Regions.size() > 0) {
382       dataManifestBuilder.addAllRegionManifests(v1Regions);
383     }
384     if (v2Regions != null && v2Regions.size() > 0) {
385       dataManifestBuilder.addAllRegionManifests(v2Regions);
386     }
387 
388     // Write the v2 Data Manifest.
389     // Once the data-manifest is written, the snapshot can be considered complete.
390     // Currently snapshots are written in a "temporary" directory and later
391     // moved to the "complated" snapshot directory.
392     SnapshotDataManifest dataManifest = dataManifestBuilder.build();
393     writeDataManifest(dataManifest);
394     this.regionManifests = dataManifest.getRegionManifestsList();
395 
396     // Remove the region manifests. Everything is now in the data-manifest.
397     // The delete operation is "relaxed", unless we get an exception we keep going.
398     // The extra files in the snapshot directory will not give any problem,
399     // since they have the same content as the data manifest, and even by re-reading
400     // them we will get the same information.
401     if (v1Regions != null && v1Regions.size() > 0) {
402       for (SnapshotRegionManifest regionManifest: v1Regions) {
403         SnapshotManifestV1.deleteRegionManifest(fs, workingDir, regionManifest);
404       }
405     }
406     if (v2Regions != null && v2Regions.size() > 0) {
407       for (SnapshotRegionManifest regionManifest: v2Regions) {
408         SnapshotManifestV2.deleteRegionManifest(fs, workingDir, regionManifest);
409       }
410     }
411   }
412 
413   /*
414    * Write the SnapshotDataManifest file
415    */
416   private void writeDataManifest(final SnapshotDataManifest manifest)
417       throws IOException {
418     FSDataOutputStream stream = fs.create(new Path(workingDir, DATA_MANIFEST_NAME));
419     try {
420       manifest.writeTo(stream);
421     } finally {
422       stream.close();
423     }
424   }
425 
426   /*
427    * Read the SnapshotDataManifest file
428    */
429   private SnapshotDataManifest readDataManifest() throws IOException {
430     FSDataInputStream in = null;
431     try {
432       in = fs.open(new Path(workingDir, DATA_MANIFEST_NAME));
433       return SnapshotDataManifest.parseFrom(in);
434     } catch (FileNotFoundException e) {
435       return null;
436     } finally {
437       if (in != null) in.close();
438     }
439   }
440 
441   private ThreadPoolExecutor createExecutor(final String name) {
442     return createExecutor(conf, name);
443   }
444 
445   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
446     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
447     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
448               Threads.getNamedThreadFactory(name));
449   }
450 
451   /**
452    * Extract the region encoded name from the region manifest
453    */
454   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
455     byte[] regionName = HRegionInfo.createRegionName(
456             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
457             manifest.getRegionInfo().getStartKey().toByteArray(),
458             manifest.getRegionInfo().getRegionId(), true);
459     return HRegionInfo.encodeRegionName(regionName);
460   }
461 
462   /*
463    * Return the snapshot format
464    */
465   private static int getSnapshotFormat(final SnapshotDescription desc) {
466     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
467   }
468 }