View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collections;
24  import java.util.HashSet;
25  import java.util.List;
26  import java.util.Set;
27  import java.util.concurrent.Callable;
28  import java.util.concurrent.ConcurrentHashMap;
29  import java.util.concurrent.ExecutionException;
30  import java.util.concurrent.ExecutorService;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.Future;
33  
34  import org.apache.commons.cli.CommandLine;
35  import org.apache.commons.cli.CommandLineParser;
36  import org.apache.commons.cli.GnuParser;
37  import org.apache.commons.cli.HelpFormatter;
38  import org.apache.commons.cli.Option;
39  import org.apache.commons.cli.Options;
40  import org.apache.commons.cli.ParseException;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.conf.Configured;
44  import org.apache.hadoop.fs.FSDataInputStream;
45  import org.apache.hadoop.fs.FileStatus;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.HBaseConfiguration;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.NamespaceDescriptor;
51  import org.apache.hadoop.hbase.io.FileLink;
52  import org.apache.hadoop.hbase.io.HFileLink;
53  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
54  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
55  import org.apache.hadoop.util.Tool;
56  import org.apache.hadoop.util.ToolRunner;
57  
58  /**
59   * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions which
60   * have such files.
61   * <p>
62   * To print the help section of the tool:
63   * </p>
64   * <ul>
65   * <li>./bin/hbase org.apache.hadoop.hbase.util.HFileV1Detector --h or,</li>
66   * <li>java -cp `hbase classpath` org.apache.hadoop.hbase.util.HFileV1Detector --h</li>
67   * </ul>
68   * <p>
69   * It also supports -h, --help, -help options.
70   * </p>
71   */
72  public class HFileV1Detector extends Configured implements Tool {
73    private FileSystem fs;
74    private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
75    private static final int DEFAULT_NUM_OF_THREADS = 10;
76    /**
77     * Pre-namespace archive directory
78     */
79    private static final String PRE_NS_DOT_ARCHIVE = ".archive";
80    /**
81     * Pre-namespace tmp directory
82     */
83    private static final String PRE_NS_DOT_TMP = ".tmp";
84    private int numOfThreads;
85    /**
86     * directory to start the processing.
87     */
88    private Path targetDirPath;
89    /**
90     * executor for processing regions.
91     */
92    private ExecutorService exec;
93  
94    /**
95     * Keeps record of processed tables.
96     */
97    private final Set<Path> processedTables = new HashSet<Path>();
98    /**
99     * set of corrupted HFiles (with undetermined major version)
100    */
101   private final Set<Path> corruptedHFiles = Collections
102       .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
103   /**
104    * set of HfileV1;
105    */
106   private final Set<Path> hFileV1Set = Collections
107       .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
108 
109   private Options options = new Options();
110   /**
111    * used for computing pre-namespace paths for hfilelinks
112    */
113   private Path defaultNamespace;
114 
115   public HFileV1Detector() {
116     Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
117     pathOption.setRequired(false);
118     options.addOption(pathOption);
119     Option threadOption = new Option("n", "numberOfThreads", true,
120         "Number of threads to use while processing HFiles.");
121     threadOption.setRequired(false);
122     options.addOption(threadOption);
123     options.addOption("h", "help", false, "Help");
124   }
125 
126   private boolean parseOption(String[] args) throws ParseException, IOException {
127     if (args.length == 0) {
128       return true; // no args will process with default values.
129     }
130     CommandLineParser parser = new GnuParser();
131     CommandLine cmd = parser.parse(options, args);
132     if (cmd.hasOption("h")) {
133       HelpFormatter formatter = new HelpFormatter();
134       formatter.printHelp("HFileV1Detector", options, true);
135       System.out
136           .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
137       System.out.println("Example:");
138       System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
139       System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
140       System.out.println();
141       return false;
142     }
143 
144     if (cmd.hasOption("p")) {
145       this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p"));
146     }
147     try {
148       if (cmd.hasOption("n")) {
149         int n = Integer.parseInt(cmd.getOptionValue("n"));
150         if (n < 0 || n > 100) {
151           LOG.warn("Please use a positive number <= 100 for number of threads."
152               + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
153           return true;
154         }
155         this.numOfThreads = n;
156       }
157     } catch (NumberFormatException nfe) {
158       LOG.error("Please select a valid number for threads");
159       return false;
160     }
161     return true;
162   }
163 
164   /**
165    * Checks for HFileV1.
166    * @return 0 when no HFileV1 is present.
167    *         1 when a HFileV1 is present or, when there is a file with corrupt major version
168    *          (neither V1 nor V2).
169    *        -1 in case of any error/exception
170    */
171   @Override
172   public int run(String args[]) throws IOException, ParseException {
173     FSUtils.setFsDefault(getConf(), new Path(FSUtils.getRootDir(getConf()).toUri()));
174     fs = FileSystem.get(getConf());
175     numOfThreads = DEFAULT_NUM_OF_THREADS;
176     targetDirPath = FSUtils.getRootDir(getConf());
177     if (!parseOption(args)) {
178       System.exit(-1);
179     }
180     this.exec = Executors.newFixedThreadPool(numOfThreads);
181     try {
182       return processResult(checkForV1Files(targetDirPath));
183     } catch (Exception e) {
184       LOG.error(e);
185     } finally {
186       exec.shutdown();
187       fs.close();
188     }
189     return -1;
190   }
191 
192   private void setDefaultNamespaceDir() throws IOException {
193     Path dataDir = new Path(FSUtils.getRootDir(getConf()), HConstants.BASE_NAMESPACE_DIR);
194     defaultNamespace = new Path(dataDir, NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR);
195   }
196 
197   private int processResult(Set<Path> regionsWithHFileV1) {
198     LOG.info("Result: \n");
199     printSet(processedTables, "Tables Processed: ");
200 
201     int count = hFileV1Set.size();
202     LOG.info("Count of HFileV1: " + count);
203     if (count > 0) printSet(hFileV1Set, "HFileV1:");
204 
205     count = corruptedHFiles.size();
206     LOG.info("Count of corrupted files: " + count);
207     if (count > 0) printSet(corruptedHFiles, "Corrupted Files: ");
208 
209     count = regionsWithHFileV1.size();
210     LOG.info("Count of Regions with HFileV1: " + count);
211     if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: ");
212 
213     return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1;
214   }
215 
216   private void printSet(Set<Path> result, String msg) {
217     LOG.info(msg);
218     for (Path p : result) {
219       LOG.info(p);
220     }
221   }
222 
223   /**
224    * Takes a directory path, and lists out any HFileV1, if present.
225    * @param targetDir directory to start looking for HFilev1.
226    * @return set of Regions that have HFileV1
227    * @throws IOException
228    */
229   private Set<Path> checkForV1Files(Path targetDir) throws IOException {
230     LOG.info("Target dir is: " + targetDir);
231     if (!fs.exists(targetDir)) {
232       throw new IOException("The given path does not exist: " + targetDir);
233     }
234     if (isTableDir(fs, targetDir)) {
235       processedTables.add(targetDir);
236       return processTable(targetDir);
237     }
238     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
239     FileStatus[] fsStats = fs.listStatus(targetDir);
240     for (FileStatus fsStat : fsStats) {
241       if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) {
242         processedTables.add(fsStat.getPath());
243         // look for regions and find out any v1 file.
244         regionsWithHFileV1.addAll(processTable(fsStat.getPath()));
245       } else {
246         LOG.info("Ignoring path: " + fsStat.getPath());
247       }
248     }
249     return regionsWithHFileV1;
250   }
251 
252   /**
253    * Ignore ROOT table as it doesn't exist in 0.96.
254    * @param path
255    */
256   private boolean isRootTable(Path path) {
257     if (path != null && path.toString().endsWith("-ROOT-")) return true;
258     return false;
259   }
260 
261   /**
262    * Find out regions in the table which have HFileV1.
263    * @param tableDir
264    * @return the set of regions containing HFile v1.
265    * @throws IOException
266    */
267   private Set<Path> processTable(Path tableDir) throws IOException {
268     // list out the regions and then process each file in it.
269     LOG.debug("processing table: " + tableDir);
270     List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
271     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
272 
273     FileStatus[] fsStats = fs.listStatus(tableDir);
274     for (FileStatus fsStat : fsStats) {
275       // process each region
276       if (isRegionDir(fs, fsStat.getPath())) {
277         regionLevelResults.add(processRegion(fsStat.getPath()));
278       }
279     }
280     for (Future<Path> f : regionLevelResults) {
281       try {
282         if (f.get() != null) {
283           regionsWithHFileV1.add(f.get());
284         }
285       } catch (InterruptedException e) {
286         LOG.error(e);
287       } catch (ExecutionException e) {
288         LOG.error(e); // might be a bad hfile. We print it at the end.
289       }
290     }
291     return regionsWithHFileV1;
292   }
293 
294   /**
295    * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path is
296    * returned as the future result, otherwise, a null value is returned.
297    * @param regionDir Region to process.
298    * @return corresponding Future object.
299    */
300   private Future<Path> processRegion(final Path regionDir) {
301     LOG.debug("processing region: " + regionDir);
302     Callable<Path> regionCallable = new Callable<Path>() {
303       @Override
304       public Path call() throws Exception {
305         for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
306           FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
307           if (storeFiles == null || storeFiles.length == 0) continue;
308           for (FileStatus storeFile : storeFiles) {
309             Path storeFilePath = storeFile.getPath();
310             FSDataInputStream fsdis = null;
311             long lenToRead = 0;
312             try {
313               // check whether this path is a reference.
314               if (StoreFileInfo.isReference(storeFilePath)) continue;
315               // check whether this path is a HFileLink.
316               else if (HFileLink.isHFileLink(storeFilePath)) {
317                 FileLink fLink = getFileLinkWithPreNSPath(storeFilePath);
318                 fsdis = fLink.open(fs);
319                 lenToRead = fLink.getFileStatus(fs).getLen();
320               } else {
321                 // a regular hfile
322                 fsdis = fs.open(storeFilePath);
323                 lenToRead = storeFile.getLen();
324               }
325               int majorVersion = computeMajorVersion(fsdis, lenToRead);
326               if (majorVersion == 1) {
327                 hFileV1Set.add(storeFilePath);
328                 // return this region path, as it needs to be compacted.
329                 return regionDir;
330               }
331               if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException(
332                   "Incorrect major version: " + majorVersion);
333             } catch (Exception iae) {
334               corruptedHFiles.add(storeFilePath);
335               LOG.error("Got exception while reading trailer for file: "+ storeFilePath, iae);
336             } finally {
337               if (fsdis != null) fsdis.close();
338             }
339           }
340         }
341         return null;
342       }
343 
344       private int computeMajorVersion(FSDataInputStream istream, long fileSize)
345        throws IOException {
346         //read up the last int of the file. Major version is in the last 3 bytes.
347         long seekPoint = fileSize - Bytes.SIZEOF_INT;
348         if (seekPoint < 0)
349           throw new IllegalArgumentException("File too small, no major version found");
350 
351         // Read the version from the last int of the file.
352         istream.seek(seekPoint);
353         int version = istream.readInt();
354         // Extract and return the major version
355         return version & 0x00ffffff;
356       }
357     };
358     Future<Path> f = exec.submit(regionCallable);
359     return f;
360   }
361 
362   /**
363    * Creates a FileLink which adds pre-namespace paths in its list of available paths. This is used
364    * when reading a snapshot file in a pre-namespace file layout, for example, while upgrading.
365    * @param storeFilePath
366    * @return a FileLink which could read from pre-namespace paths.
367    * @throws IOException
368    */
369   public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException {
370     HFileLink link = HFileLink.buildFromHFileLinkPattern(getConf(), storeFilePath);
371     List<Path> pathsToProcess = getPreNSPathsForHFileLink(link);
372     pathsToProcess.addAll(Arrays.asList(link.getLocations()));
373     return new FileLink(pathsToProcess);
374   }
375 
376   private List<Path> getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException {
377     if (defaultNamespace == null) setDefaultNamespaceDir();
378     List<Path> p = new ArrayList<Path>();
379     String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath());
380     p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath));
381     p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath));
382     p.add(getPreNSPath(null, relativeTablePath));
383     return p;
384   }
385 
386   /**
387    * Removes the prefix of defaultNamespace from the path.
388    * @param originalPath
389    */
390   private String removeDefaultNSPath(Path originalPath) {
391     String pathStr = originalPath.toString();
392     if (!pathStr.startsWith(defaultNamespace.toString())) return pathStr;
393     return pathStr.substring(defaultNamespace.toString().length() + 1);
394   }
395 
396   private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException {
397     String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR
398         + relativeTablePath);
399     return new Path(FSUtils.getRootDir(getConf()), relativePath);
400   }
401 
402   private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
403     // check for old format, of having /table/.tableinfo; hbase:meta doesn't has .tableinfo,
404     // include it.
405     if (fs.isFile(path)) return false;
406     return (FSTableDescriptors.getTableInfoPath(fs, path) != null || FSTableDescriptors
407         .getCurrentTableInfoStatus(fs, path, false) != null) || path.toString().endsWith(".META.");
408   }
409 
410   private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
411     if (fs.isFile(path)) return false;
412     Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
413     return fs.exists(regionInfo);
414 
415   }
416 
417   public static void main(String args[]) throws Exception {
418     System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
419   }
420 
421 }