View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.IOException;
22  import java.io.FileNotFoundException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.concurrent.ThreadPoolExecutor;
29  import java.util.concurrent.TimeUnit;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.TableDescriptor;
42  import org.apache.hadoop.hbase.client.TableState;
43  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
44  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
45  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
46  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
47  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48  import org.apache.hadoop.hbase.regionserver.HRegion;
49  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
50  import org.apache.hadoop.hbase.regionserver.Store;
51  import org.apache.hadoop.hbase.regionserver.StoreFile;
52  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
53  import org.apache.hadoop.hbase.util.Bytes;
54  import org.apache.hadoop.hbase.util.FSUtils;
55  import org.apache.hadoop.hbase.util.FSTableDescriptors;
56  import org.apache.hadoop.hbase.util.Threads;
57  
58  /**
59   * Utility class to help read/write the Snapshot Manifest.
60   *
61   * The snapshot format is transparent for the users of this class,
62   * once the snapshot is written, it will never be modified.
63   * On open() the snapshot will be loaded to the current in-memory format.
64   */
65  @InterfaceAudience.Private
66  public class SnapshotManifest {
67    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
68  
69    private static final String DATA_MANIFEST_NAME = "data.manifest";
70  
71    private List<SnapshotRegionManifest> regionManifests;
72    private SnapshotDescription desc;
73    private HTableDescriptor htd;
74  
75    private final ForeignExceptionSnare monitor;
76    private final Configuration conf;
77    private final Path workingDir;
78    private final FileSystem fs;
79  
80    private SnapshotManifest(final Configuration conf, final FileSystem fs,
81        final Path workingDir, final SnapshotDescription desc,
82        final ForeignExceptionSnare monitor) {
83      this.monitor = monitor;
84      this.desc = desc;
85      this.workingDir = workingDir;
86      this.conf = conf;
87      this.fs = fs;
88    }
89  
90    /**
91     * Return a SnapshotManifest instance, used for writing a snapshot.
92     *
93     * There are two usage pattern:
94     *  - The Master will create a manifest, add the descriptor, offline regions
95     *    and consolidate the snapshot by writing all the pending stuff on-disk.
96     *      manifest = SnapshotManifest.create(...)
97     *      manifest.addRegion(tableDir, hri)
98     *      manifest.consolidate()
99     *  - The RegionServer will create a single region manifest
100    *      manifest = SnapshotManifest.create(...)
101    *      manifest.addRegion(region)
102    */
103   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
104       final Path workingDir, final SnapshotDescription desc,
105       final ForeignExceptionSnare monitor) {
106     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
107   }
108 
109   /**
110    * Return a SnapshotManifest instance with the information already loaded in-memory.
111    *    SnapshotManifest manifest = SnapshotManifest.open(...)
112    *    HTableDescriptor htd = manifest.getTableDescriptor()
113    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
114    *      hri = regionManifest.getRegionInfo()
115    *      for (regionManifest.getFamilyFiles())
116    *        ...
117    */
118   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
119       final Path workingDir, final SnapshotDescription desc) throws IOException {
120     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null);
121     manifest.load();
122     return manifest;
123   }
124 
125 
126   /**
127    * Add the table descriptor to the snapshot manifest
128    */
129   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
130     this.htd = htd;
131   }
132 
133   interface RegionVisitor<TRegion, TFamily> {
134     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
135     void regionClose(final TRegion region) throws IOException;
136 
137     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
138     void familyClose(final TRegion region, final TFamily family) throws IOException;
139 
140     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
141       throws IOException;
142   }
143 
144   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
145     switch (getSnapshotFormat(desc)) {
146       case SnapshotManifestV1.DESCRIPTOR_VERSION:
147         return new SnapshotManifestV1.ManifestBuilder(conf, fs, workingDir);
148       case SnapshotManifestV2.DESCRIPTOR_VERSION:
149         return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir);
150       default:
151         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
152     }
153   }
154 
155   /**
156    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
157    * This is used by the "online snapshot" when the table is enabled.
158    */
159   public void addRegion(final HRegion region) throws IOException {
160     // 0. Get the ManifestBuilder/RegionVisitor
161     RegionVisitor visitor = createRegionVisitor(desc);
162 
163     // 1. dump region meta info into the snapshot directory
164     LOG.debug("Storing '" + region + "' region-info for snapshot.");
165     Object regionData = visitor.regionOpen(region.getRegionInfo());
166     monitor.rethrowException();
167 
168     // 2. iterate through all the stores in the region
169     LOG.debug("Creating references for hfiles");
170 
171     for (Store store : region.getStores().values()) {
172       // 2.1. build the snapshot reference for the store
173       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
174       monitor.rethrowException();
175 
176       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
177       if (LOG.isDebugEnabled()) {
178         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
179       }
180 
181       // 2.2. iterate through all the store's files and create "references".
182       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
183         StoreFile storeFile = storeFiles.get(i);
184         monitor.rethrowException();
185 
186         // create "reference" to this store file.
187         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
188         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
189       }
190       visitor.familyClose(regionData, familyData);
191     }
192     visitor.regionClose(regionData);
193   }
194 
195   /**
196    * Creates a 'manifest' for the specified region, by reading directly from the disk.
197    * This is used by the "offline snapshot" when the table is disabled.
198    */
199   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
200     // 0. Get the ManifestBuilder/RegionVisitor
201     RegionVisitor visitor = createRegionVisitor(desc);
202 
203     // Open the RegionFS
204     HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
205           tableDir, regionInfo, true);
206     monitor.rethrowException();
207 
208     // 1. dump region meta info into the snapshot directory
209     LOG.debug("Storing region-info for snapshot.");
210     Object regionData = visitor.regionOpen(regionInfo);
211     monitor.rethrowException();
212 
213     // 2. iterate through all the stores in the region
214     LOG.debug("Creating references for hfiles");
215 
216     // This ensures that we have an atomic view of the directory as long as we have < ls limit
217     // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in
218     // batches and may miss files being added/deleted. This could be more robust (iteratively
219     // checking to see if we have all the files until we are sure), but the limit is currently 1000
220     // files/batch, far more than the number of store files under a single column family.
221     Collection<String> familyNames = regionFs.getFamilies();
222     if (familyNames != null) {
223       for (String familyName: familyNames) {
224         Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
225         monitor.rethrowException();
226 
227         Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(familyName);
228         if (storeFiles == null) {
229           LOG.debug("No files under family: " + familyName);
230           continue;
231         }
232 
233         // 2.1. build the snapshot reference for the store
234         if (LOG.isDebugEnabled()) {
235           LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
236         }
237 
238         // 2.2. iterate through all the store's files and create "references".
239         int i = 0;
240         int sz = storeFiles.size();
241         for (StoreFileInfo storeFile: storeFiles) {
242           monitor.rethrowException();
243 
244           // create "reference" to this store file.
245           LOG.debug("Adding reference for file ("+ (++i) +"/" + sz + "): " + storeFile.getPath());
246           visitor.storeFile(regionData, familyData, storeFile);
247         }
248         visitor.familyClose(regionData, familyData);
249       }
250     }
251     visitor.regionClose(regionData);
252   }
253 
254   /**
255    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
256    *
257    * If the format is v2 and there is no data-manifest, means that we are loading an
258    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
259    * regions format.
260    */
261   private void load() throws IOException {
262     switch (getSnapshotFormat(desc)) {
263       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
264         this.htd = FSTableDescriptors.getTableDescriptorFromFs(fs, workingDir)
265             .getHTableDescriptor();
266         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
267         try {
268           this.regionManifests =
269             SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
270         } finally {
271           tpool.shutdown();
272         }
273         break;
274       }
275       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
276         SnapshotDataManifest dataManifest = readDataManifest();
277         if (dataManifest != null) {
278           htd = HTableDescriptor.convert(dataManifest.getTableSchema());
279           regionManifests = dataManifest.getRegionManifestsList();
280         } else {
281           // Compatibility, load the v1 regions
282           // This happens only when the snapshot is in-progress and the cache wants to refresh.
283           List<SnapshotRegionManifest> v1Regions, v2Regions;
284           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
285           try {
286             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
287             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
288           } finally {
289             tpool.shutdown();
290           }
291           if (v1Regions != null && v2Regions != null) {
292             regionManifests =
293               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
294             regionManifests.addAll(v1Regions);
295             regionManifests.addAll(v2Regions);
296           } else if (v1Regions != null) {
297             regionManifests = v1Regions;
298           } else /* if (v2Regions != null) */ {
299             regionManifests = v2Regions;
300           }
301         }
302         break;
303       }
304       default:
305         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
306     }
307   }
308 
309   /**
310    * Get the current snapshot working dir
311    */
312   public Path getSnapshotDir() {
313     return this.workingDir;
314   }
315 
316   /**
317    * Get the SnapshotDescription
318    */
319   public SnapshotDescription getSnapshotDescription() {
320     return this.desc;
321   }
322 
323   /**
324    * Get the table descriptor from the Snapshot
325    */
326   public HTableDescriptor getTableDescriptor() {
327     return this.htd;
328   }
329 
330   /**
331    * Get all the Region Manifest from the snapshot
332    */
333   public List<SnapshotRegionManifest> getRegionManifests() {
334     return this.regionManifests;
335   }
336 
337   /**
338    * Get all the Region Manifest from the snapshot.
339    * This is an helper to get a map with the region encoded name
340    */
341   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
342     if (regionManifests == null || regionManifests.size() == 0) return null;
343 
344     HashMap<String, SnapshotRegionManifest> regionsMap =
345         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
346     for (SnapshotRegionManifest manifest: regionManifests) {
347       String regionName = getRegionNameFromManifest(manifest);
348       regionsMap.put(regionName, manifest);
349     }
350     return regionsMap;
351   }
352 
353   public void consolidate() throws IOException {
354     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
355       Path rootDir = FSUtils.getRootDir(conf);
356       LOG.info("Using old Snapshot Format");
357       // write a copy of descriptor to the snapshot directory
358       new FSTableDescriptors(fs, rootDir)
359         .createTableDescriptorForTableDirectory(workingDir, new TableDescriptor(
360             htd, TableState.State.ENABLED), false);
361     } else {
362       LOG.debug("Convert to Single Snapshot Manifest");
363       convertToV2SingleManifest();
364     }
365   }
366 
367   /*
368    * In case of rolling-upgrade, we try to read all the formats and build
369    * the snapshot with the latest format.
370    */
371   private void convertToV2SingleManifest() throws IOException {
372     // Try to load v1 and v2 regions
373     List<SnapshotRegionManifest> v1Regions, v2Regions;
374     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
375     try {
376       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
377       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
378     } finally {
379       tpool.shutdown();
380     }
381 
382     SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
383     dataManifestBuilder.setTableSchema(htd.convert());
384 
385     if (v1Regions != null && v1Regions.size() > 0) {
386       dataManifestBuilder.addAllRegionManifests(v1Regions);
387     }
388     if (v2Regions != null && v2Regions.size() > 0) {
389       dataManifestBuilder.addAllRegionManifests(v2Regions);
390     }
391 
392     // Write the v2 Data Manifest.
393     // Once the data-manifest is written, the snapshot can be considered complete.
394     // Currently snapshots are written in a "temporary" directory and later
395     // moved to the "complated" snapshot directory.
396     SnapshotDataManifest dataManifest = dataManifestBuilder.build();
397     writeDataManifest(dataManifest);
398     this.regionManifests = dataManifest.getRegionManifestsList();
399 
400     // Remove the region manifests. Everything is now in the data-manifest.
401     // The delete operation is "relaxed", unless we get an exception we keep going.
402     // The extra files in the snapshot directory will not give any problem,
403     // since they have the same content as the data manifest, and even by re-reading
404     // them we will get the same information.
405     if (v1Regions != null && v1Regions.size() > 0) {
406       for (SnapshotRegionManifest regionManifest: v1Regions) {
407         SnapshotManifestV1.deleteRegionManifest(fs, workingDir, regionManifest);
408       }
409     }
410     if (v2Regions != null && v2Regions.size() > 0) {
411       for (SnapshotRegionManifest regionManifest: v2Regions) {
412         SnapshotManifestV2.deleteRegionManifest(fs, workingDir, regionManifest);
413       }
414     }
415   }
416 
417   /*
418    * Write the SnapshotDataManifest file
419    */
420   private void writeDataManifest(final SnapshotDataManifest manifest)
421       throws IOException {
422     FSDataOutputStream stream = fs.create(new Path(workingDir, DATA_MANIFEST_NAME));
423     try {
424       manifest.writeTo(stream);
425     } finally {
426       stream.close();
427     }
428   }
429 
430   /*
431    * Read the SnapshotDataManifest file
432    */
433   private SnapshotDataManifest readDataManifest() throws IOException {
434     FSDataInputStream in = null;
435     try {
436       in = fs.open(new Path(workingDir, DATA_MANIFEST_NAME));
437       return SnapshotDataManifest.parseFrom(in);
438     } catch (FileNotFoundException e) {
439       return null;
440     } finally {
441       if (in != null) in.close();
442     }
443   }
444 
445   private ThreadPoolExecutor createExecutor(final String name) {
446     return createExecutor(conf, name);
447   }
448 
449   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
450     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
451     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
452               Threads.getNamedThreadFactory(name));
453   }
454 
455   /**
456    * Extract the region encoded name from the region manifest
457    */
458   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
459     byte[] regionName = HRegionInfo.createRegionName(
460             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
461             manifest.getRegionInfo().getStartKey().toByteArray(),
462             manifest.getRegionInfo().getRegionId(), true);
463     return HRegionInfo.encodeRegionName(regionName);
464   }
465 
466   /*
467    * Return the snapshot format
468    */
469   private static int getSnapshotFormat(final SnapshotDescription desc) {
470     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
471   }
472 }