View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.IOException;
22  import java.io.FileNotFoundException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.concurrent.ThreadPoolExecutor;
29  import java.util.concurrent.TimeUnit;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.TableDescriptor;
42  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
43  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
44  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
45  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
46  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
47  import org.apache.hadoop.hbase.regionserver.HRegion;
48  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
49  import org.apache.hadoop.hbase.regionserver.Store;
50  import org.apache.hadoop.hbase.regionserver.StoreFile;
51  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.FSUtils;
54  import org.apache.hadoop.hbase.util.FSTableDescriptors;
55  import org.apache.hadoop.hbase.util.Threads;
56  
57  /**
58   * Utility class to help read/write the Snapshot Manifest.
59   *
60   * The snapshot format is transparent for the users of this class,
61   * once the snapshot is written, it will never be modified.
62   * On open() the snapshot will be loaded to the current in-memory format.
63   */
64  @InterfaceAudience.Private
65  public class SnapshotManifest {
66    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
67  
68    private static final String DATA_MANIFEST_NAME = "data.manifest";
69  
70    private List<SnapshotRegionManifest> regionManifests;
71    private SnapshotDescription desc;
72    private HTableDescriptor htd;
73  
74    private final ForeignExceptionSnare monitor;
75    private final Configuration conf;
76    private final Path workingDir;
77    private final FileSystem fs;
78  
79    private SnapshotManifest(final Configuration conf, final FileSystem fs,
80        final Path workingDir, final SnapshotDescription desc,
81        final ForeignExceptionSnare monitor) {
82      this.monitor = monitor;
83      this.desc = desc;
84      this.workingDir = workingDir;
85      this.conf = conf;
86      this.fs = fs;
87    }
88  
89    /**
90     * Return a SnapshotManifest instance, used for writing a snapshot.
91     *
92     * There are two usage pattern:
93     *  - The Master will create a manifest, add the descriptor, offline regions
94     *    and consolidate the snapshot by writing all the pending stuff on-disk.
95     *      manifest = SnapshotManifest.create(...)
96     *      manifest.addRegion(tableDir, hri)
97     *      manifest.consolidate()
98     *  - The RegionServer will create a single region manifest
99     *      manifest = SnapshotManifest.create(...)
100    *      manifest.addRegion(region)
101    */
102   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
103       final Path workingDir, final SnapshotDescription desc,
104       final ForeignExceptionSnare monitor) {
105     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
106   }
107 
108   /**
109    * Return a SnapshotManifest instance with the information already loaded in-memory.
110    *    SnapshotManifest manifest = SnapshotManifest.open(...)
111    *    HTableDescriptor htd = manifest.getTableDescriptor()
112    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
113    *      hri = regionManifest.getRegionInfo()
114    *      for (regionManifest.getFamilyFiles())
115    *        ...
116    */
117   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
118       final Path workingDir, final SnapshotDescription desc) throws IOException {
119     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null);
120     manifest.load();
121     return manifest;
122   }
123 
124 
125   /**
126    * Add the table descriptor to the snapshot manifest
127    */
128   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
129     this.htd = htd;
130   }
131 
132   interface RegionVisitor<TRegion, TFamily> {
133     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
134     void regionClose(final TRegion region) throws IOException;
135 
136     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
137     void familyClose(final TRegion region, final TFamily family) throws IOException;
138 
139     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
140       throws IOException;
141   }
142 
143   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
144     switch (getSnapshotFormat(desc)) {
145       case SnapshotManifestV1.DESCRIPTOR_VERSION:
146         return new SnapshotManifestV1.ManifestBuilder(conf, fs, workingDir);
147       case SnapshotManifestV2.DESCRIPTOR_VERSION:
148         return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir);
149       default:
150         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
151     }
152   }
153 
154   /**
155    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
156    * This is used by the "online snapshot" when the table is enabled.
157    */
158   public void addRegion(final HRegion region) throws IOException {
159     // 0. Get the ManifestBuilder/RegionVisitor
160     RegionVisitor visitor = createRegionVisitor(desc);
161 
162     // 1. dump region meta info into the snapshot directory
163     LOG.debug("Storing '" + region + "' region-info for snapshot.");
164     Object regionData = visitor.regionOpen(region.getRegionInfo());
165     monitor.rethrowException();
166 
167     // 2. iterate through all the stores in the region
168     LOG.debug("Creating references for hfiles");
169 
170     for (Store store : region.getStores()) {
171       // 2.1. build the snapshot reference for the store
172       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
173       monitor.rethrowException();
174 
175       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
176       if (LOG.isDebugEnabled()) {
177         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
178       }
179 
180       // 2.2. iterate through all the store's files and create "references".
181       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
182         StoreFile storeFile = storeFiles.get(i);
183         monitor.rethrowException();
184 
185         // create "reference" to this store file.
186         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
187         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
188       }
189       visitor.familyClose(regionData, familyData);
190     }
191     visitor.regionClose(regionData);
192   }
193 
194   /**
195    * Creates a 'manifest' for the specified region, by reading directly from the disk.
196    * This is used by the "offline snapshot" when the table is disabled.
197    */
198   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
199     // 0. Get the ManifestBuilder/RegionVisitor
200     RegionVisitor visitor = createRegionVisitor(desc);
201 
202     // Open the RegionFS
203     HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
204           tableDir, regionInfo, true);
205     monitor.rethrowException();
206 
207     // 1. dump region meta info into the snapshot directory
208     LOG.debug("Storing region-info for snapshot.");
209     Object regionData = visitor.regionOpen(regionInfo);
210     monitor.rethrowException();
211 
212     // 2. iterate through all the stores in the region
213     LOG.debug("Creating references for hfiles");
214 
215     // This ensures that we have an atomic view of the directory as long as we have < ls limit
216     // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in
217     // batches and may miss files being added/deleted. This could be more robust (iteratively
218     // checking to see if we have all the files until we are sure), but the limit is currently 1000
219     // files/batch, far more than the number of store files under a single column family.
220     Collection<String> familyNames = regionFs.getFamilies();
221     if (familyNames != null) {
222       for (String familyName: familyNames) {
223         Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
224         monitor.rethrowException();
225 
226         Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(familyName);
227         if (storeFiles == null) {
228           LOG.debug("No files under family: " + familyName);
229           continue;
230         }
231 
232         // 2.1. build the snapshot reference for the store
233         if (LOG.isDebugEnabled()) {
234           LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
235         }
236 
237         // 2.2. iterate through all the store's files and create "references".
238         int i = 0;
239         int sz = storeFiles.size();
240         for (StoreFileInfo storeFile: storeFiles) {
241           monitor.rethrowException();
242 
243           // create "reference" to this store file.
244           LOG.debug("Adding reference for file ("+ (++i) +"/" + sz + "): " + storeFile.getPath());
245           visitor.storeFile(regionData, familyData, storeFile);
246         }
247         visitor.familyClose(regionData, familyData);
248       }
249     }
250     visitor.regionClose(regionData);
251   }
252 
253   /**
254    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
255    *
256    * If the format is v2 and there is no data-manifest, means that we are loading an
257    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
258    * regions format.
259    */
260   private void load() throws IOException {
261     switch (getSnapshotFormat(desc)) {
262       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
263         this.htd = FSTableDescriptors.getTableDescriptorFromFs(fs, workingDir)
264             .getHTableDescriptor();
265         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
266         try {
267           this.regionManifests =
268             SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
269         } finally {
270           tpool.shutdown();
271         }
272         break;
273       }
274       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
275         SnapshotDataManifest dataManifest = readDataManifest();
276         if (dataManifest != null) {
277           htd = HTableDescriptor.convert(dataManifest.getTableSchema());
278           regionManifests = dataManifest.getRegionManifestsList();
279         } else {
280           // Compatibility, load the v1 regions
281           // This happens only when the snapshot is in-progress and the cache wants to refresh.
282           List<SnapshotRegionManifest> v1Regions, v2Regions;
283           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
284           try {
285             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
286             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
287           } finally {
288             tpool.shutdown();
289           }
290           if (v1Regions != null && v2Regions != null) {
291             regionManifests =
292               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
293             regionManifests.addAll(v1Regions);
294             regionManifests.addAll(v2Regions);
295           } else if (v1Regions != null) {
296             regionManifests = v1Regions;
297           } else /* if (v2Regions != null) */ {
298             regionManifests = v2Regions;
299           }
300         }
301         break;
302       }
303       default:
304         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
305     }
306   }
307 
308   /**
309    * Get the current snapshot working dir
310    */
311   public Path getSnapshotDir() {
312     return this.workingDir;
313   }
314 
315   /**
316    * Get the SnapshotDescription
317    */
318   public SnapshotDescription getSnapshotDescription() {
319     return this.desc;
320   }
321 
322   /**
323    * Get the table descriptor from the Snapshot
324    */
325   public HTableDescriptor getTableDescriptor() {
326     return this.htd;
327   }
328 
329   /**
330    * Get all the Region Manifest from the snapshot
331    */
332   public List<SnapshotRegionManifest> getRegionManifests() {
333     return this.regionManifests;
334   }
335 
336   /**
337    * Get all the Region Manifest from the snapshot.
338    * This is an helper to get a map with the region encoded name
339    */
340   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
341     if (regionManifests == null || regionManifests.size() == 0) return null;
342 
343     HashMap<String, SnapshotRegionManifest> regionsMap =
344         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
345     for (SnapshotRegionManifest manifest: regionManifests) {
346       String regionName = getRegionNameFromManifest(manifest);
347       regionsMap.put(regionName, manifest);
348     }
349     return regionsMap;
350   }
351 
352   public void consolidate() throws IOException {
353     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
354       Path rootDir = FSUtils.getRootDir(conf);
355       LOG.info("Using old Snapshot Format");
356       // write a copy of descriptor to the snapshot directory
357       new FSTableDescriptors(conf, fs, rootDir)
358         .createTableDescriptorForTableDirectory(workingDir, new TableDescriptor(
359             htd), false);
360     } else {
361       LOG.debug("Convert to Single Snapshot Manifest");
362       convertToV2SingleManifest();
363     }
364   }
365 
366   /*
367    * In case of rolling-upgrade, we try to read all the formats and build
368    * the snapshot with the latest format.
369    */
370   private void convertToV2SingleManifest() throws IOException {
371     // Try to load v1 and v2 regions
372     List<SnapshotRegionManifest> v1Regions, v2Regions;
373     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
374     try {
375       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
376       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
377     } finally {
378       tpool.shutdown();
379     }
380 
381     SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
382     dataManifestBuilder.setTableSchema(htd.convert());
383 
384     if (v1Regions != null && v1Regions.size() > 0) {
385       dataManifestBuilder.addAllRegionManifests(v1Regions);
386     }
387     if (v2Regions != null && v2Regions.size() > 0) {
388       dataManifestBuilder.addAllRegionManifests(v2Regions);
389     }
390 
391     // Write the v2 Data Manifest.
392     // Once the data-manifest is written, the snapshot can be considered complete.
393     // Currently snapshots are written in a "temporary" directory and later
394     // moved to the "complated" snapshot directory.
395     SnapshotDataManifest dataManifest = dataManifestBuilder.build();
396     writeDataManifest(dataManifest);
397     this.regionManifests = dataManifest.getRegionManifestsList();
398 
399     // Remove the region manifests. Everything is now in the data-manifest.
400     // The delete operation is "relaxed", unless we get an exception we keep going.
401     // The extra files in the snapshot directory will not give any problem,
402     // since they have the same content as the data manifest, and even by re-reading
403     // them we will get the same information.
404     if (v1Regions != null && v1Regions.size() > 0) {
405       for (SnapshotRegionManifest regionManifest: v1Regions) {
406         SnapshotManifestV1.deleteRegionManifest(fs, workingDir, regionManifest);
407       }
408     }
409     if (v2Regions != null && v2Regions.size() > 0) {
410       for (SnapshotRegionManifest regionManifest: v2Regions) {
411         SnapshotManifestV2.deleteRegionManifest(fs, workingDir, regionManifest);
412       }
413     }
414   }
415 
416   /*
417    * Write the SnapshotDataManifest file
418    */
419   private void writeDataManifest(final SnapshotDataManifest manifest)
420       throws IOException {
421     FSDataOutputStream stream = fs.create(new Path(workingDir, DATA_MANIFEST_NAME));
422     try {
423       manifest.writeTo(stream);
424     } finally {
425       stream.close();
426     }
427   }
428 
429   /*
430    * Read the SnapshotDataManifest file
431    */
432   private SnapshotDataManifest readDataManifest() throws IOException {
433     FSDataInputStream in = null;
434     try {
435       in = fs.open(new Path(workingDir, DATA_MANIFEST_NAME));
436       return SnapshotDataManifest.parseFrom(in);
437     } catch (FileNotFoundException e) {
438       return null;
439     } finally {
440       if (in != null) in.close();
441     }
442   }
443 
444   private ThreadPoolExecutor createExecutor(final String name) {
445     return createExecutor(conf, name);
446   }
447 
448   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
449     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
450     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
451               Threads.getNamedThreadFactory(name));
452   }
453 
454   /**
455    * Extract the region encoded name from the region manifest
456    */
457   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
458     byte[] regionName = HRegionInfo.createRegionName(
459             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
460             manifest.getRegionInfo().getStartKey().toByteArray(),
461             manifest.getRegionInfo().getRegionId(), true);
462     return HRegionInfo.encodeRegionName(regionName);
463   }
464 
465   /*
466    * Return the snapshot format
467    */
468   private static int getSnapshotFormat(final SnapshotDescription desc) {
469     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
470   }
471 }