View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import org.apache.commons.cli.CommandLine;
33  import org.apache.commons.cli.CommandLineParser;
34  import org.apache.commons.cli.HelpFormatter;
35  import org.apache.commons.cli.Option;
36  import org.apache.commons.cli.OptionGroup;
37  import org.apache.commons.cli.Options;
38  import org.apache.commons.cli.ParseException;
39  import org.apache.commons.cli.PosixParser;
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.classification.InterfaceAudience;
43  import org.apache.hadoop.classification.InterfaceStability;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.conf.Configured;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.HConstants;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.HBaseConfiguration;
51  import org.apache.hadoop.hbase.HRegionInfo;
52  import org.apache.hadoop.hbase.KeyValue;
53  import org.apache.hadoop.hbase.KeyValueUtil;
54  import org.apache.hadoop.hbase.TableName;
55  import org.apache.hadoop.hbase.Tag;
56  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
57  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
58  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
59  import org.apache.hadoop.hbase.util.BloomFilter;
60  import org.apache.hadoop.hbase.util.BloomFilterFactory;
61  import org.apache.hadoop.hbase.util.ByteBloomFilter;
62  import org.apache.hadoop.hbase.util.Bytes;
63  import org.apache.hadoop.hbase.util.FSUtils;
64  import org.apache.hadoop.hbase.util.Writables;
65  import org.apache.hadoop.util.Tool;
66  import org.apache.hadoop.util.ToolRunner;
67  
68  import com.yammer.metrics.core.Histogram;
69  import com.yammer.metrics.core.Metric;
70  import com.yammer.metrics.core.MetricName;
71  import com.yammer.metrics.core.MetricPredicate;
72  import com.yammer.metrics.core.MetricsRegistry;
73  import com.yammer.metrics.reporting.ConsoleReporter;
74  
75  /**
76   * Implements pretty-printing functionality for {@link HFile}s.
77   */
78  @InterfaceAudience.Public
79  @InterfaceStability.Evolving
80  public class HFilePrettyPrinter extends Configured implements Tool {
81  
82    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
83  
84    private Options options = new Options();
85  
86    private boolean verbose;
87    private boolean printValue;
88    private boolean printKey;
89    private boolean shouldPrintMeta;
90    private boolean printBlockIndex;
91    private boolean printBlockHeaders;
92    private boolean printStats;
93    private boolean checkRow;
94    private boolean checkFamily;
95    private boolean isSeekToRow = false;
96  
97    /**
98     * The row which the user wants to specify and print all the KeyValues for.
99     */
100   private byte[] row = null;
101 
102   private List<Path> files = new ArrayList<Path>();
103   private int count;
104 
105   private static final String FOUR_SPACES = "    ";
106 
107   public HFilePrettyPrinter() {
108     super();
109     init();
110   }
111 
112   public HFilePrettyPrinter(Configuration conf) {
113     super(conf);
114     init();
115   }
116 
117   private void init() {
118     options.addOption("v", "verbose", false,
119         "Verbose output; emits file and meta data delimiters");
120     options.addOption("p", "printkv", false, "Print key/value pairs");
121     options.addOption("e", "printkey", false, "Print keys");
122     options.addOption("m", "printmeta", false, "Print meta data of file");
123     options.addOption("b", "printblocks", false, "Print block index meta data");
124     options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
125     options.addOption("k", "checkrow", false,
126         "Enable row order check; looks for out-of-order keys");
127     options.addOption("a", "checkfamily", false, "Enable family check");
128     options.addOption("w", "seekToRow", true,
129       "Seek to this row and print all the kvs for this row only");
130     options.addOption("s", "stats", false, "Print statistics");
131 
132     OptionGroup files = new OptionGroup();
133     files.addOption(new Option("f", "file", true,
134       "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
135     files.addOption(new Option("r", "region", true,
136       "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
137     options.addOptionGroup(files);
138   }
139 
140   public boolean parseOptions(String args[]) throws ParseException,
141       IOException {
142     if (args.length == 0) {
143       HelpFormatter formatter = new HelpFormatter();
144       formatter.printHelp("HFile", options, true);
145       return false;
146     }
147     CommandLineParser parser = new PosixParser();
148     CommandLine cmd = parser.parse(options, args);
149 
150     verbose = cmd.hasOption("v");
151     printValue = cmd.hasOption("p");
152     printKey = cmd.hasOption("e") || printValue;
153     shouldPrintMeta = cmd.hasOption("m");
154     printBlockIndex = cmd.hasOption("b");
155     printBlockHeaders = cmd.hasOption("h");
156     printStats = cmd.hasOption("s");
157     checkRow = cmd.hasOption("k");
158     checkFamily = cmd.hasOption("a");
159 
160     if (cmd.hasOption("f")) {
161       files.add(new Path(cmd.getOptionValue("f")));
162     }
163 
164     if (cmd.hasOption("w")) {
165       String key = cmd.getOptionValue("w");
166       if (key != null && key.length() != 0) {
167         row = key.getBytes();
168         isSeekToRow = true;
169       } else {
170         System.err.println("Invalid row is specified.");
171         System.exit(-1);
172       }
173     }
174 
175     if (cmd.hasOption("r")) {
176       String regionName = cmd.getOptionValue("r");
177       byte[] rn = Bytes.toBytes(regionName);
178       byte[][] hri = HRegionInfo.parseRegionName(rn);
179       Path rootDir = FSUtils.getRootDir(getConf());
180       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
181       String enc = HRegionInfo.encodeRegionName(rn);
182       Path regionDir = new Path(tableDir, enc);
183       if (verbose)
184         System.out.println("region dir -> " + regionDir);
185       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
186           regionDir);
187       if (verbose)
188         System.out.println("Number of region files found -> "
189             + regionFiles.size());
190       if (verbose) {
191         int i = 1;
192         for (Path p : regionFiles) {
193           if (verbose)
194             System.out.println("Found file[" + i++ + "] -> " + p);
195         }
196       }
197       files.addAll(regionFiles);
198     }
199 
200     return true;
201   }
202 
203   /**
204    * Runs the command-line pretty-printer, and returns the desired command
205    * exit code (zero for success, non-zero for failure).
206    */
207   public int run(String[] args) {
208     if (getConf() == null) {
209       throw new RuntimeException("A Configuration instance must be provided.");
210     }
211     try {
212       FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
213       if (!parseOptions(args))
214         return 1;
215     } catch (IOException ex) {
216       LOG.error("Error parsing command-line options", ex);
217       return 1;
218     } catch (ParseException ex) {
219       LOG.error("Error parsing command-line options", ex);
220       return 1;
221     }
222 
223     // iterate over all files found
224     for (Path fileName : files) {
225       try {
226         processFile(fileName);
227       } catch (IOException ex) {
228         LOG.error("Error reading " + fileName, ex);
229       }
230     }
231 
232     if (verbose || printKey) {
233       System.out.println("Scanned kv count -> " + count);
234     }
235 
236     return 0;
237   }
238 
239   private void processFile(Path file) throws IOException {
240     if (verbose)
241       System.out.println("Scanning -> " + file);
242     FileSystem fs = file.getFileSystem(getConf());
243     if (!fs.exists(file)) {
244       System.err.println("ERROR, file doesnt exist: " + file);
245     }
246 
247     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
248 
249     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
250 
251     KeyValueStatsCollector fileStats = null;
252 
253     if (verbose || printKey || checkRow || checkFamily || printStats) {
254       // scan over file and read key/value's and check if requested
255       HFileScanner scanner = reader.getScanner(false, false, false);
256       fileStats = new KeyValueStatsCollector();
257       boolean shouldScanKeysValues = false;
258       if (this.isSeekToRow) {
259         // seek to the first kv on this row
260         shouldScanKeysValues = 
261           (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row).getKey()) != -1);
262       } else {
263         shouldScanKeysValues = scanner.seekTo();
264       }
265       if (shouldScanKeysValues)
266         scanKeysValues(file, fileStats, scanner, row);
267     }
268 
269     // print meta data
270     if (shouldPrintMeta) {
271       printMeta(reader, fileInfo);
272     }
273 
274     if (printBlockIndex) {
275       System.out.println("Block Index:");
276       System.out.println(reader.getDataBlockIndexReader());
277     }
278 
279     if (printBlockHeaders) {
280       System.out.println("Block Headers:");
281       /*
282        * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
283        * TestLazyDataBlockDecompression. Refactor?
284        */
285       FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
286       long fileSize = fs.getFileStatus(file).getLen();
287       FixedFileTrailer trailer =
288         FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
289       long offset = trailer.getFirstDataBlockOffset(),
290         max = trailer.getLastDataBlockOffset();
291       HFileBlock block;
292       while (offset <= max) {
293         block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
294           /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
295         offset += block.getOnDiskSizeWithHeader();
296         System.out.println(block);
297       }
298     }
299 
300     if (printStats) {
301       fileStats.finish();
302       System.out.println("Stats:\n" + fileStats);
303     }
304 
305     reader.close();
306   }
307 
308   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
309       HFileScanner scanner,  byte[] row) throws IOException {
310     KeyValue pkv = null;
311     do {
312       KeyValue kv = KeyValueUtil.ensureKeyValue(scanner.getKeyValue());
313       if (row != null && row.length != 0) {
314         int result = Bytes.compareTo(kv.getRow(), row);
315         if (result > 0) {
316           break;
317         } else if (result < 0) {
318           continue;
319         }
320       }
321       // collect stats
322       if (printStats) {
323         fileStats.collect(kv);
324       }
325       // dump key value
326       if (printKey) {
327         System.out.print("K: " + kv);
328         if (printValue) {
329           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
330           int i = 0;
331           List<Tag> tags = kv.getTags();
332           for (Tag tag : tags) {
333             System.out
334                 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
335           }
336         }
337         System.out.println();
338       }
339       // check if rows are in order
340       if (checkRow && pkv != null) {
341         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
342           System.err.println("WARNING, previous row is greater then"
343               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
344               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
345               + Bytes.toStringBinary(kv.getKey()));
346         }
347       }
348       // check if families are consistent
349       if (checkFamily) {
350         String fam = Bytes.toString(kv.getFamily());
351         if (!file.toString().contains(fam)) {
352           System.err.println("WARNING, filename does not match kv family,"
353               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
354               + Bytes.toStringBinary(kv.getKey()));
355         }
356         if (pkv != null
357             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
358           System.err.println("WARNING, previous kv has different family"
359               + " compared to current key\n\tfilename -> " + file
360               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
361               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
362         }
363       }
364       pkv = kv;
365       ++count;
366     } while (scanner.next());
367   }
368 
369   /**
370    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
371    * with a four-space indentation.
372    */
373   private static String asSeparateLines(String keyValueStr) {
374     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
375                                   ",\n" + FOUR_SPACES + "$1");
376   }
377 
378   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
379       throws IOException {
380     System.out.println("Block index size as per heapsize: "
381         + reader.indexSize());
382     System.out.println(asSeparateLines(reader.toString()));
383     System.out.println("Trailer:\n    "
384         + asSeparateLines(reader.getTrailer().toString()));
385     System.out.println("Fileinfo:");
386     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
387       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
388       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
389         long seqid = Bytes.toLong(e.getValue());
390         System.out.println(seqid);
391       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
392         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
393         Writables.copyWritable(e.getValue(), timeRangeTracker);
394         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
395             + timeRangeTracker.getMaximumTimestamp());
396       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
397           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
398         System.out.println(Bytes.toInt(e.getValue()));
399       } else {
400         System.out.println(Bytes.toStringBinary(e.getValue()));
401       }
402     }
403 
404     try {
405       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
406     } catch (Exception e) {
407       System.out.println ("Unable to retrieve the midkey");
408     }
409 
410     // Printing general bloom information
411     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
412     BloomFilter bloomFilter = null;
413     if (bloomMeta != null)
414       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
415 
416     System.out.println("Bloom filter:");
417     if (bloomFilter != null) {
418       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
419           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
420     } else {
421       System.out.println(FOUR_SPACES + "Not present");
422     }
423 
424     // Printing delete bloom information
425     bloomMeta = reader.getDeleteBloomFilterMetadata();
426     bloomFilter = null;
427     if (bloomMeta != null)
428       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
429 
430     System.out.println("Delete Family Bloom filter:");
431     if (bloomFilter != null) {
432       System.out.println(FOUR_SPACES
433           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
434               "\n" + FOUR_SPACES));
435     } else {
436       System.out.println(FOUR_SPACES + "Not present");
437     }
438   }
439 
440   private static class KeyValueStatsCollector {
441     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
442     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
443     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
444     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
445     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
446     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
447     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
448 
449     long curRowBytes = 0;
450     long curRowCols = 0;
451 
452     byte[] biggestRow = null;
453 
454     private KeyValue prevKV = null;
455     private long maxRowBytes = 0;
456     private long curRowKeyLength;
457 
458     public void collect(KeyValue kv) {
459       valLen.update(kv.getValueLength());
460       if (prevKV != null &&
461           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
462         // new row
463         collectRow();
464       }
465       curRowBytes += kv.getLength();
466       curRowKeyLength = kv.getKeyLength();
467       curRowCols++;
468       prevKV = kv;
469     }
470 
471     private void collectRow() {
472       rowSizeBytes.update(curRowBytes);
473       rowSizeCols.update(curRowCols);
474       keyLen.update(curRowKeyLength);
475 
476       if (curRowBytes > maxRowBytes && prevKV != null) {
477         biggestRow = prevKV.getRow();
478         maxRowBytes = curRowBytes;
479       }
480 
481       curRowBytes = 0;
482       curRowCols = 0;
483     }
484 
485     public void finish() {
486       if (curRowCols > 0) {
487         collectRow();
488       }
489     }
490 
491     @Override
492     public String toString() {
493       if (prevKV == null)
494         return "no data available for statistics";
495 
496       // Dump the metrics to the output stream
497       simpleReporter.shutdown();
498       simpleReporter.run();
499       metricsRegistry.shutdown();
500 
501       return
502               metricsOutput.toString() +
503                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
504     }
505   }
506 
507   private static class SimpleReporter extends ConsoleReporter {
508     private final PrintStream out;
509 
510     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
511       super(metricsRegistry, out, MetricPredicate.ALL);
512       this.out = out;
513     }
514 
515     @Override
516     public void run() {
517       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
518               MetricPredicate.ALL).entrySet()) {
519         try {
520           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
521             out.print("   " + subEntry.getKey().getName());
522             out.println(':');
523 
524             subEntry.getValue().processWith(this, subEntry.getKey(), out);
525           }
526         } catch (Exception e) {
527           e.printStackTrace(out);
528         }
529       }
530     }
531 
532     @Override
533     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
534       super.processHistogram(name, histogram, stream);
535       stream.printf(Locale.getDefault(), "             count = %d\n", histogram.count());
536     }
537   }
538 
539   public static void main(String[] args) throws Exception {
540     Configuration conf = HBaseConfiguration.create();
541     // no need for a block cache
542     conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
543     int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
544     System.exit(ret);
545   }
546 }