View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collections;
24  import java.util.HashSet;
25  import java.util.List;
26  import java.util.Set;
27  import java.util.concurrent.Callable;
28  import java.util.concurrent.ConcurrentHashMap;
29  import java.util.concurrent.ExecutionException;
30  import java.util.concurrent.ExecutorService;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.Future;
33  
34  import org.apache.commons.cli.CommandLine;
35  import org.apache.commons.cli.CommandLineParser;
36  import org.apache.commons.cli.GnuParser;
37  import org.apache.commons.cli.HelpFormatter;
38  import org.apache.commons.cli.Option;
39  import org.apache.commons.cli.Options;
40  import org.apache.commons.cli.ParseException;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.conf.Configured;
44  import org.apache.hadoop.fs.FSDataInputStream;
45  import org.apache.hadoop.fs.FileStatus;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.HBaseConfiguration;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.NamespaceDescriptor;
51  import org.apache.hadoop.hbase.io.FileLink;
52  import org.apache.hadoop.hbase.io.HFileLink;
53  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
54  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
55  import org.apache.hadoop.util.Tool;
56  import org.apache.hadoop.util.ToolRunner;
57  
58  /**
59   * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions which
60   * have such files.
61   * <p>
62   * To print the help section of the tool:
63   * <ul>
64   * <li>./bin/hbase org.apache.hadoop.hbase.util.HFileV1Detector --h or,
65   * <li>java -cp `hbase classpath` org.apache.hadoop.hbase.util.HFileV1Detector --h
66   * </ul>
67   * It also supports -h, --help, -help options.
68   * </p>
69   */
70  public class HFileV1Detector extends Configured implements Tool {
71    private FileSystem fs;
72    private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
73    private static final int DEFAULT_NUM_OF_THREADS = 10;
74    /**
75     * Pre-namespace archive directory
76     */
77    private static final String PRE_NS_DOT_ARCHIVE = ".archive";
78    /**
79     * Pre-namespace tmp directory
80     */
81    private static final String PRE_NS_DOT_TMP = ".tmp";
82    private int numOfThreads;
83    /**
84     * directory to start the processing.
85     */
86    private Path targetDirPath;
87    /**
88     * executor for processing regions.
89     */
90    private ExecutorService exec;
91  
92    /**
93     * Keeps record of processed tables.
94     */
95    private final Set<Path> processedTables = new HashSet<Path>();
96    /**
97     * set of corrupted HFiles (with undetermined major version)
98     */
99    private final Set<Path> corruptedHFiles = Collections
100       .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
101   /**
102    * set of HfileV1;
103    */
104   private final Set<Path> hFileV1Set = Collections
105       .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
106 
107   private Options options = new Options();
108   /**
109    * used for computing pre-namespace paths for hfilelinks
110    */
111   private Path defaultNamespace;
112 
113   public HFileV1Detector() {
114     Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
115     pathOption.setRequired(false);
116     options.addOption(pathOption);
117     Option threadOption = new Option("n", "numberOfThreads", true,
118         "Number of threads to use while processing HFiles.");
119     threadOption.setRequired(false);
120     options.addOption(threadOption);
121     options.addOption("h", "help", false, "Help");
122   }
123 
124   private boolean parseOption(String[] args) throws ParseException, IOException {
125     if (args.length == 0) {
126       return true; // no args will process with default values.
127     }
128     CommandLineParser parser = new GnuParser();
129     CommandLine cmd = parser.parse(options, args);
130     if (cmd.hasOption("h")) {
131       HelpFormatter formatter = new HelpFormatter();
132       formatter.printHelp("HFileV1Detector", options, true);
133       System.out
134           .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
135       System.out.println("Example:");
136       System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
137       System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
138       System.out.println();
139       return false;
140     }
141 
142     if (cmd.hasOption("p")) {
143       this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p"));
144     }
145     try {
146       if (cmd.hasOption("n")) {
147         int n = Integer.parseInt(cmd.getOptionValue("n"));
148         if (n < 0 || n > 100) {
149           LOG.warn("Please use a positive number <= 100 for number of threads."
150               + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
151           return true;
152         }
153         this.numOfThreads = n;
154       }
155     } catch (NumberFormatException nfe) {
156       LOG.error("Please select a valid number for threads");
157       return false;
158     }
159     return true;
160   }
161 
162   /**
163    * Checks for HFileV1.
164    * @return 0 when no HFileV1 is present.
165    *         1 when a HFileV1 is present or, when there is a file with corrupt major version
166    *          (neither V1 nor V2).
167    *        -1 in case of any error/exception
168    */
169   @Override
170   public int run(String args[]) throws IOException, ParseException {
171     FSUtils.setFsDefault(getConf(), new Path(FSUtils.getRootDir(getConf()).toUri()));
172     fs = FileSystem.get(getConf());
173     numOfThreads = DEFAULT_NUM_OF_THREADS;
174     targetDirPath = FSUtils.getRootDir(getConf());
175     if (!parseOption(args)) {
176       System.exit(-1);
177     }
178     this.exec = Executors.newFixedThreadPool(numOfThreads);
179     try {
180       return processResult(checkForV1Files(targetDirPath));
181     } catch (Exception e) {
182       LOG.error(e);
183     } finally {
184       exec.shutdown();
185       fs.close();
186     }
187     return -1;
188   }
189 
190   private void setDefaultNamespaceDir() throws IOException {
191     Path dataDir = new Path(FSUtils.getRootDir(getConf()), HConstants.BASE_NAMESPACE_DIR);
192     defaultNamespace = new Path(dataDir, NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR);
193   }
194 
195   private int processResult(Set<Path> regionsWithHFileV1) {
196     LOG.info("Result: \n");
197     printSet(processedTables, "Tables Processed: ");
198 
199     int count = hFileV1Set.size();
200     LOG.info("Count of HFileV1: " + count);
201     if (count > 0) printSet(hFileV1Set, "HFileV1:");
202 
203     count = corruptedHFiles.size();
204     LOG.info("Count of corrupted files: " + count);
205     if (count > 0) printSet(corruptedHFiles, "Corrupted Files: ");
206 
207     count = regionsWithHFileV1.size();
208     LOG.info("Count of Regions with HFileV1: " + count);
209     if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: ");
210 
211     return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1;
212   }
213 
214   private void printSet(Set<Path> result, String msg) {
215     LOG.info(msg);
216     for (Path p : result) {
217       LOG.info(p);
218     }
219   }
220 
221   /**
222    * Takes a directory path, and lists out any HFileV1, if present.
223    * @param targetDir directory to start looking for HFilev1.
224    * @return set of Regions that have HFileV1
225    * @throws IOException
226    */
227   private Set<Path> checkForV1Files(Path targetDir) throws IOException {
228     LOG.info("Target dir is: " + targetDir);
229     if (!fs.exists(targetDir)) {
230       throw new IOException("The given path does not exist: " + targetDir);
231     }
232     if (isTableDir(fs, targetDir)) {
233       processedTables.add(targetDir);
234       return processTable(targetDir);
235     }
236     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
237     FileStatus[] fsStats = fs.listStatus(targetDir);
238     for (FileStatus fsStat : fsStats) {
239       if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) {
240         processedTables.add(fsStat.getPath());
241         // look for regions and find out any v1 file.
242         regionsWithHFileV1.addAll(processTable(fsStat.getPath()));
243       } else {
244         LOG.info("Ignoring path: " + fsStat.getPath());
245       }
246     }
247     return regionsWithHFileV1;
248   }
249 
250   /**
251    * Ignore ROOT table as it doesn't exist in 0.96.
252    * @param path
253    */
254   private boolean isRootTable(Path path) {
255     if (path != null && path.toString().endsWith("-ROOT-")) return true;
256     return false;
257   }
258 
259   /**
260    * Find out regions in the table which have HFileV1.
261    * @param tableDir
262    * @return the set of regions containing HFile v1.
263    * @throws IOException
264    */
265   private Set<Path> processTable(Path tableDir) throws IOException {
266     // list out the regions and then process each file in it.
267     LOG.debug("processing table: " + tableDir);
268     List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
269     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
270 
271     FileStatus[] fsStats = fs.listStatus(tableDir);
272     for (FileStatus fsStat : fsStats) {
273       // process each region
274       if (isRegionDir(fs, fsStat.getPath())) {
275         regionLevelResults.add(processRegion(fsStat.getPath()));
276       }
277     }
278     for (Future<Path> f : regionLevelResults) {
279       try {
280         if (f.get() != null) {
281           regionsWithHFileV1.add(f.get());
282         }
283       } catch (InterruptedException e) {
284         LOG.error(e);
285       } catch (ExecutionException e) {
286         LOG.error(e); // might be a bad hfile. We print it at the end.
287       }
288     }
289     return regionsWithHFileV1;
290   }
291 
292   /**
293    * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path is
294    * returned as the future result, otherwise, a null value is returned.
295    * @param regionDir Region to process.
296    * @return corresponding Future object.
297    */
298   private Future<Path> processRegion(final Path regionDir) {
299     LOG.debug("processing region: " + regionDir);
300     Callable<Path> regionCallable = new Callable<Path>() {
301       @Override
302       public Path call() throws Exception {
303         for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
304           FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
305           if (storeFiles == null || storeFiles.length == 0) continue;
306           for (FileStatus storeFile : storeFiles) {
307             Path storeFilePath = storeFile.getPath();
308             FSDataInputStream fsdis = null;
309             long lenToRead = 0;
310             try {
311               // check whether this path is a reference.
312               if (StoreFileInfo.isReference(storeFilePath)) continue;
313               // check whether this path is a HFileLink.
314               else if (HFileLink.isHFileLink(storeFilePath)) {
315                 FileLink fLink = getFileLinkWithPreNSPath(storeFilePath);
316                 fsdis = fLink.open(fs);
317                 lenToRead = fLink.getFileStatus(fs).getLen();
318               } else {
319                 // a regular hfile
320                 fsdis = fs.open(storeFilePath);
321                 lenToRead = storeFile.getLen();
322               }
323               int majorVersion = computeMajorVersion(fsdis, lenToRead);
324               if (majorVersion == 1) {
325                 hFileV1Set.add(storeFilePath);
326                 // return this region path, as it needs to be compacted.
327                 return regionDir;
328               }
329               if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException(
330                   "Incorrect major version: " + majorVersion);
331             } catch (Exception iae) {
332               corruptedHFiles.add(storeFilePath);
333               LOG.error("Got exception while reading trailer for file: "+ storeFilePath, iae);
334             } finally {
335               if (fsdis != null) fsdis.close();
336             }
337           }
338         }
339         return null;
340       }
341 
342       private int computeMajorVersion(FSDataInputStream istream, long fileSize)
343        throws IOException {
344         //read up the last int of the file. Major version is in the last 3 bytes.
345         long seekPoint = fileSize - Bytes.SIZEOF_INT;
346         if (seekPoint < 0)
347           throw new IllegalArgumentException("File too small, no major version found");
348 
349         // Read the version from the last int of the file.
350         istream.seek(seekPoint);
351         int version = istream.readInt();
352         // Extract and return the major version
353         return version & 0x00ffffff;
354       }
355     };
356     Future<Path> f = exec.submit(regionCallable);
357     return f;
358   }
359 
360   /**
361    * Creates a FileLink which adds pre-namespace paths in its list of available paths. This is used
362    * when reading a snapshot file in a pre-namespace file layout, for example, while upgrading.
363    * @param storeFilePath
364    * @return a FileLink which could read from pre-namespace paths.
365    * @throws IOException
366    */
367   public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException {
368     HFileLink link = HFileLink.buildFromHFileLinkPattern(getConf(), storeFilePath);
369     List<Path> pathsToProcess = getPreNSPathsForHFileLink(link);
370     pathsToProcess.addAll(Arrays.asList(link.getLocations()));
371     return new FileLink(pathsToProcess);
372   }
373 
374   private List<Path> getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException {
375     if (defaultNamespace == null) setDefaultNamespaceDir();
376     List<Path> p = new ArrayList<Path>();
377     String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath());
378     p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath));
379     p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath));
380     p.add(getPreNSPath(null, relativeTablePath));
381     return p;
382   }
383 
384   /**
385    * Removes the prefix of defaultNamespace from the path.
386    * @param originalPath
387    */
388   private String removeDefaultNSPath(Path originalPath) {
389     String pathStr = originalPath.toString();
390     if (!pathStr.startsWith(defaultNamespace.toString())) return pathStr;
391     return pathStr.substring(defaultNamespace.toString().length() + 1);
392   }
393 
394   private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException {
395     String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR
396         + relativeTablePath);
397     return new Path(FSUtils.getRootDir(getConf()), relativePath);
398   }
399 
400   private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
401     // check for old format, of having /table/.tableinfo; hbase:meta doesn't has .tableinfo,
402     // include it.
403     if (fs.isFile(path)) return false;
404     return (FSTableDescriptors.getTableInfoPath(fs, path) != null || FSTableDescriptors
405         .getCurrentTableInfoStatus(fs, path, false) != null) || path.toString().endsWith(".META.");
406   }
407 
408   private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
409     if (fs.isFile(path)) return false;
410     Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
411     return fs.exists(regionInfo);
412 
413   }
414 
415   public static void main(String args[]) throws Exception {
416     System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
417   }
418 
419 }