View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collections;
24  import java.util.HashSet;
25  import java.util.List;
26  import java.util.Set;
27  import java.util.concurrent.Callable;
28  import java.util.concurrent.ConcurrentHashMap;
29  import java.util.concurrent.ExecutionException;
30  import java.util.concurrent.ExecutorService;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.Future;
33  
34  import org.apache.commons.cli.CommandLine;
35  import org.apache.commons.cli.CommandLineParser;
36  import org.apache.commons.cli.GnuParser;
37  import org.apache.commons.cli.HelpFormatter;
38  import org.apache.commons.cli.Option;
39  import org.apache.commons.cli.Options;
40  import org.apache.commons.cli.ParseException;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.conf.Configured;
44  import org.apache.hadoop.fs.FSDataInputStream;
45  import org.apache.hadoop.fs.FileStatus;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.HBaseConfiguration;
49  import org.apache.hadoop.hbase.io.FileLink;
50  import org.apache.hadoop.hbase.io.HFileLink;
51  import org.apache.hadoop.hbase.regionserver.StoreFile;
52  import org.apache.hadoop.util.Tool;
53  import org.apache.hadoop.util.ToolRunner;
54  
55  /**
56   * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions which
57   * have such files.
58   * <p>
59   * To print the help section of the tool:
60   * <ul>
61   * <li>./bin/hbase org.apache.hadoop.hbase.util.HFileV1Detector --h or,
62   * <li>java -cp `hbase classpath` org.apache.hadoop.hbase.util.HFileV1Detector --h
63   * </ul>
64   * It also supports -h, --help, -help options.
65   * </p>
66   */
67  public class HFileV1Detector extends Configured implements Tool {
68    private FileSystem fs;
69    private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
70    private static final int DEFAULT_NUM_OF_THREADS = 10;
71    /**
72     * Pre-namespace archive directory
73     */
74    private static final String PRE_NS_DOT_ARCHIVE = ".archive";
75    /**
76     * Pre-namespace tmp directory
77     */
78    private static final String PRE_NS_DOT_TMP = ".tmp";
79    private int numOfThreads;
80    /**
81     * directory to start the processing.
82     */
83    private Path targetDirPath;
84    /**
85     * executor for processing regions.
86     */
87    private ExecutorService exec;
88  
89    /**
90     * Keeps record of processed tables.
91     */
92    private final Set<Path> processedTables = new HashSet<Path>();
93    /**
94     * set of corrupted HFiles (with undetermined major version)
95     */
96    private final Set<Path> corruptedHFiles = Collections
97        .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
98    /**
99     * set of HfileV1;
100    */
101   private final Set<Path> hFileV1Set = Collections
102       .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
103 
104   private Options options = new Options();
105 
106   private Path defaultRootDir = null;
107   public HFileV1Detector() {
108     Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
109     pathOption.setRequired(false);
110     options.addOption(pathOption);
111     Option threadOption = new Option("n", "numberOfThreads", true,
112         "Number of threads to use while processing HFiles.");
113     threadOption.setRequired(false);
114     options.addOption(threadOption);
115     options.addOption("h", "help", false, "Help");
116   }
117 
118   private boolean parseOption(String[] args) throws ParseException, IOException {
119     if (args.length == 0) {
120       return true; // no args will process with default values.
121     }
122     CommandLineParser parser = new GnuParser();
123     CommandLine cmd = parser.parse(options, args);
124     if (cmd.hasOption("h")) {
125       HelpFormatter formatter = new HelpFormatter();
126       formatter.printHelp("HFileV1Detector", options, true);
127       System.out
128           .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
129       System.out.println("Example:");
130       System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
131       System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
132       System.out.println();
133       return false;
134     }
135 
136     if (cmd.hasOption("p")) {
137       this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p"));
138     }
139     try {
140       if (cmd.hasOption("n")) {
141         int n = Integer.parseInt(cmd.getOptionValue("n"));
142         if (n < 0 || n > 100) {
143           LOG.warn("Please use a positive number <= 100 for number of threads."
144               + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
145           return true;
146         }
147         this.numOfThreads = n;
148       }
149     } catch (NumberFormatException nfe) {
150       LOG.error("Please select a valid number for threads");
151       return false;
152     }
153     return true;
154   }
155 
156   /**
157    * Checks for HFileV1.
158    * @return 0 when no HFileV1 is present.
159    *         1 when a HFileV1 is present or, when there is a file with corrupt major version
160    *          (neither V1 nor V2).
161    *        -1 in case of any error/exception
162    */
163   @Override
164   public int run(String args[]) throws IOException, ParseException {
165     Path root = new Path(FSUtils.getRootDir(getConf()).toUri());
166     getConf().set("fs.defaultFS", root.toString());    // for hadoop 0.21+
167     fs = FileSystem.get(getConf());
168     numOfThreads = DEFAULT_NUM_OF_THREADS;
169     targetDirPath = FSUtils.getRootDir(getConf());
170     if (!parseOption(args)) {
171       System.exit(-1);
172     }
173     this.exec = Executors.newFixedThreadPool(numOfThreads);
174     try {
175       return processResult(checkForV1Files(targetDirPath));
176     } catch (Exception e) {
177       LOG.error(e);
178     } finally {
179       exec.shutdown();
180       fs.close();
181     }
182     return -1;
183   }
184 
185   private int processResult(Set<Path> regionsWithHFileV1) {
186     LOG.info("Result: \n");
187     printSet(processedTables, "Tables Processed: ");
188 
189     int count = hFileV1Set.size();
190     LOG.info("Count of HFileV1: " + count);
191     if (count > 0) printSet(hFileV1Set, "HFileV1:");
192 
193     count = corruptedHFiles.size();
194     LOG.info("Count of corrupted files: " + count);
195     if (count > 0) printSet(corruptedHFiles, "Corrupted Files: ");
196 
197     count = regionsWithHFileV1.size();
198     LOG.info("Count of Regions with HFileV1: " + count);
199     if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: ");
200 
201     return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1;
202   }
203 
204   private void printSet(Set<Path> result, String msg) {
205     LOG.info(msg);
206     for (Path p : result) {
207       LOG.info(p);
208     }
209   }
210 
211   /**
212    * Takes a directory path, and lists out any HFileV1, if present.
213    * @param targetDir directory to start looking for HFilev1.
214    * @return set of Regions that have HFileV1
215    * @throws IOException
216    */
217   private Set<Path> checkForV1Files(Path targetDir) throws IOException {
218     LOG.info("Target dir is: " + targetDir);
219     if (!fs.exists(targetDir)) {
220       throw new IOException("The given path does not exist: " + targetDir);
221     }
222     if (isTableDir(fs, targetDir)) {
223       processedTables.add(targetDir);
224       return processTable(targetDir);
225     }
226     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
227     FileStatus[] fsStats = fs.listStatus(targetDir);
228     for (FileStatus fsStat : fsStats) {
229       if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) {
230         processedTables.add(fsStat.getPath());
231         // look for regions and find out any v1 file.
232         regionsWithHFileV1.addAll(processTable(fsStat.getPath()));
233       } else {
234         LOG.info("Ignoring path: " + fsStat.getPath());
235       }
236     }
237     return regionsWithHFileV1;
238   }
239 
240   /**
241    * Ignore ROOT table as it doesn't exist in 0.96.
242    * @param path
243    */
244   private boolean isRootTable(Path path) {
245     if (path != null && path.toString().endsWith("-ROOT-")) return true;
246     return false;
247   }
248 
249   /**
250    * Find out regions in the table which have HFileV1.
251    * @param tableDir
252    * @return the set of regions containing HFile v1.
253    * @throws IOException
254    */
255   private Set<Path> processTable(Path tableDir) throws IOException {
256     // list out the regions and then process each file in it.
257     LOG.debug("processing table: " + tableDir);
258     List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
259     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
260 
261     FileStatus[] fsStats = fs.listStatus(tableDir);
262     for (FileStatus fsStat : fsStats) {
263       // process each region
264       if (isRegionDir(fs, fsStat.getPath())) {
265         regionLevelResults.add(processRegion(fsStat.getPath()));
266       }
267     }
268     for (Future<Path> f : regionLevelResults) {
269       try {
270         if (f.get() != null) {
271           regionsWithHFileV1.add(f.get());
272         }
273       } catch (InterruptedException e) {
274         LOG.error(e);
275       } catch (ExecutionException e) {
276         LOG.error(e); // might be a bad hfile. We print it at the end.
277       }
278     }
279     return regionsWithHFileV1;
280   }
281 
282   /**
283    * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path is
284    * returned as the future result, otherwise, a null value is returned.
285    * @param regionDir Region to process.
286    * @return corresponding Future object.
287    */
288   private Future<Path> processRegion(final Path regionDir) {
289     LOG.debug("processing region: " + regionDir);
290     Callable<Path> regionCallable = new Callable<Path>() {
291       @Override
292       public Path call() throws Exception {
293         for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
294           FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
295           if (storeFiles == null || storeFiles.length == 0) continue;
296           for (FileStatus storeFile : storeFiles) {
297             Path storeFilePath = storeFile.getPath();
298             FSDataInputStream fsdis = null;
299             long lenToRead = 0;
300             try {
301               // check whether this path is a reference.
302               if (StoreFile.isReference(storeFilePath)) continue;
303               // check whether this path is a HFileLink.
304               else if (HFileLink.isHFileLink(storeFilePath)) {
305                 FileLink fLink = getFileLinkWithPreNSPath(storeFilePath);
306                 fsdis = fLink.open(fs);
307                 lenToRead = fLink.getFileStatus(fs).getLen();
308               } else {
309                 // a regular hfile
310                 fsdis = fs.open(storeFilePath);
311                 lenToRead = storeFile.getLen();
312               }
313               int majorVersion = computeMajorVersion(fsdis, lenToRead);
314               if (majorVersion == 1) {
315                 hFileV1Set.add(storeFilePath);
316                 // return this region path, as it needs to be compacted.
317                 return regionDir;
318               }
319               if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException(
320                   "Incorrect major version: " + majorVersion);
321             } catch (Exception iae) {
322               corruptedHFiles.add(storeFilePath);
323               LOG.error("Got exception while reading trailer for file: "+ storeFilePath, iae);
324             } finally {
325               if (fsdis != null) fsdis.close();
326             }
327           }
328         }
329         return null;
330       }
331 
332       private int computeMajorVersion(FSDataInputStream istream, long fileSize)
333        throws IOException {
334         //read up the last int of the file. Major version is in the last 3 bytes.
335         long seekPoint = fileSize - Bytes.SIZEOF_INT;
336         if (seekPoint < 0)
337           throw new IllegalArgumentException("File too small, no major version found");
338 
339         // Read the version from the last int of the file.
340         istream.seek(seekPoint);
341         int version = istream.readInt();
342         // Extract and return the major version
343         return version & 0x00ffffff;
344       }
345     };
346     Future<Path> f = exec.submit(regionCallable);
347     return f;
348   }
349 
350   /**
351    * Creates a FileLink which adds pre-namespace paths in its list of available paths. This is used
352    * when reading a snapshot file in a pre-namespace file layout, for example, while upgrading.
353    * @param storeFilePath
354    * @return a FileLink which could read from pre-namespace paths.
355    * @throws IOException
356    */
357   public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException {
358     HFileLink link = new HFileLink(getConf(), storeFilePath);
359     List<Path> pathsToProcess = getPreNSPathsForHFileLink(link);
360     pathsToProcess.addAll(Arrays.asList(link.getLocations()));
361     return new FileLink(pathsToProcess);
362   }
363 
364   private List<Path> getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException {
365     List<Path> p = new ArrayList<Path>();
366     String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath());
367     p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath));
368     p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath));
369     p.add(getPreNSPath(null, relativeTablePath));
370     return p;
371   }
372   
373   /**
374    * Removes the prefix of defaultNamespace from the path.
375    * @param originPath
376  * @throws IOException 
377    */
378   private String removeDefaultNSPath(Path originalPath) throws IOException {
379     if (defaultRootDir == null) {
380       defaultRootDir = FSUtils.getRootDir(getConf());
381     }
382     String pathStr = originalPath.toString();
383     if (!pathStr.startsWith(defaultRootDir.toString())) return pathStr;
384     return pathStr.substring(defaultRootDir.toString().length() + 1);
385   }
386 
387   private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException {
388     String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR
389         + relativeTablePath);
390     return new Path(FSUtils.getRootDir(getConf()), relativePath);
391   }
392 
393   private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
394     // check for old format, of having /table/.tableinfo; hbase:meta doesn't has .tableinfo,
395     // include it.
396     if (fs.isFile(path)) return false;
397     return (FSTableDescriptors.getTableInfoPath(fs, path) != null)
398         || path.toString().endsWith(".META.");
399   }
400 
401   private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
402     if (fs.isFile(path)) return false;
403     Path regionInfo = new Path(path, ".regioninfo");
404     return fs.exists(regionInfo);
405 
406   }
407 
408   public static void main(String args[]) throws Exception {
409     System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
410   }
411 
412 }