View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import org.apache.commons.cli.CommandLine;
33  import org.apache.commons.cli.CommandLineParser;
34  import org.apache.commons.cli.HelpFormatter;
35  import org.apache.commons.cli.Option;
36  import org.apache.commons.cli.OptionGroup;
37  import org.apache.commons.cli.Options;
38  import org.apache.commons.cli.ParseException;
39  import org.apache.commons.cli.PosixParser;
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.hbase.classification.InterfaceAudience;
43  import org.apache.hadoop.hbase.classification.InterfaceStability;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.conf.Configured;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.Cell;
49  import org.apache.hadoop.hbase.CellComparator;
50  import org.apache.hadoop.hbase.CellUtil;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.TableName;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HRegionInfo;
55  import org.apache.hadoop.hbase.KeyValue;
56  import org.apache.hadoop.hbase.KeyValueUtil;
57  import org.apache.hadoop.hbase.Tag;
58  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
59  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
60  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
61  import org.apache.hadoop.hbase.util.BloomFilter;
62  import org.apache.hadoop.hbase.util.BloomFilterFactory;
63  import org.apache.hadoop.hbase.util.ByteBloomFilter;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.FSUtils;
66  import org.apache.hadoop.hbase.util.Writables;
67  import org.apache.hadoop.util.Tool;
68  import org.apache.hadoop.util.ToolRunner;
69  
70  import com.yammer.metrics.core.Histogram;
71  import com.yammer.metrics.core.Metric;
72  import com.yammer.metrics.core.MetricName;
73  import com.yammer.metrics.core.MetricPredicate;
74  import com.yammer.metrics.core.MetricsRegistry;
75  import com.yammer.metrics.reporting.ConsoleReporter;
76  
77  /**
78   * Implements pretty-printing functionality for {@link HFile}s.
79   */
80  @InterfaceAudience.Public
81  @InterfaceStability.Evolving
82  public class HFilePrettyPrinter extends Configured implements Tool {
83  
84    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
85  
86    private Options options = new Options();
87  
88    private boolean verbose;
89    private boolean printValue;
90    private boolean printKey;
91    private boolean shouldPrintMeta;
92    private boolean printBlockIndex;
93    private boolean printBlockHeaders;
94    private boolean printStats;
95    private boolean checkRow;
96    private boolean checkFamily;
97    private boolean isSeekToRow = false;
98  
99    /**
100    * The row which the user wants to specify and print all the KeyValues for.
101    */
102   private byte[] row = null;
103 
104   private List<Path> files = new ArrayList<Path>();
105   private int count;
106 
107   private static final String FOUR_SPACES = "    ";
108 
109   public HFilePrettyPrinter() {
110     super();
111     init();
112   }
113 
114   public HFilePrettyPrinter(Configuration conf) {
115     super(conf);
116     init();
117   }
118 
119   private void init() {
120     options.addOption("v", "verbose", false,
121         "Verbose output; emits file and meta data delimiters");
122     options.addOption("p", "printkv", false, "Print key/value pairs");
123     options.addOption("e", "printkey", false, "Print keys");
124     options.addOption("m", "printmeta", false, "Print meta data of file");
125     options.addOption("b", "printblocks", false, "Print block index meta data");
126     options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
127     options.addOption("k", "checkrow", false,
128         "Enable row order check; looks for out-of-order keys");
129     options.addOption("a", "checkfamily", false, "Enable family check");
130     options.addOption("w", "seekToRow", true,
131       "Seek to this row and print all the kvs for this row only");
132     options.addOption("s", "stats", false, "Print statistics");
133 
134     OptionGroup files = new OptionGroup();
135     files.addOption(new Option("f", "file", true,
136       "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
137     files.addOption(new Option("r", "region", true,
138       "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
139     options.addOptionGroup(files);
140   }
141 
142   public boolean parseOptions(String args[]) throws ParseException,
143       IOException {
144     if (args.length == 0) {
145       HelpFormatter formatter = new HelpFormatter();
146       formatter.printHelp("HFile", options, true);
147       return false;
148     }
149     CommandLineParser parser = new PosixParser();
150     CommandLine cmd = parser.parse(options, args);
151 
152     verbose = cmd.hasOption("v");
153     printValue = cmd.hasOption("p");
154     printKey = cmd.hasOption("e") || printValue;
155     shouldPrintMeta = cmd.hasOption("m");
156     printBlockIndex = cmd.hasOption("b");
157     printBlockHeaders = cmd.hasOption("h");
158     printStats = cmd.hasOption("s");
159     checkRow = cmd.hasOption("k");
160     checkFamily = cmd.hasOption("a");
161 
162     if (cmd.hasOption("f")) {
163       files.add(new Path(cmd.getOptionValue("f")));
164     }
165 
166     if (cmd.hasOption("w")) {
167       String key = cmd.getOptionValue("w");
168       if (key != null && key.length() != 0) {
169         row = key.getBytes();
170         isSeekToRow = true;
171       } else {
172         System.err.println("Invalid row is specified.");
173         System.exit(-1);
174       }
175     }
176 
177     if (cmd.hasOption("r")) {
178       String regionName = cmd.getOptionValue("r");
179       byte[] rn = Bytes.toBytes(regionName);
180       byte[][] hri = HRegionInfo.parseRegionName(rn);
181       Path rootDir = FSUtils.getRootDir(getConf());
182       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
183       String enc = HRegionInfo.encodeRegionName(rn);
184       Path regionDir = new Path(tableDir, enc);
185       if (verbose)
186         System.out.println("region dir -> " + regionDir);
187       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
188           regionDir);
189       if (verbose)
190         System.out.println("Number of region files found -> "
191             + regionFiles.size());
192       if (verbose) {
193         int i = 1;
194         for (Path p : regionFiles) {
195           if (verbose)
196             System.out.println("Found file[" + i++ + "] -> " + p);
197         }
198       }
199       files.addAll(regionFiles);
200     }
201 
202     return true;
203   }
204 
205   /**
206    * Runs the command-line pretty-printer, and returns the desired command
207    * exit code (zero for success, non-zero for failure).
208    */
209   public int run(String[] args) {
210     if (getConf() == null) {
211       throw new RuntimeException("A Configuration instance must be provided.");
212     }
213     try {
214       FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
215       if (!parseOptions(args))
216         return 1;
217     } catch (IOException ex) {
218       LOG.error("Error parsing command-line options", ex);
219       return 1;
220     } catch (ParseException ex) {
221       LOG.error("Error parsing command-line options", ex);
222       return 1;
223     }
224 
225     // iterate over all files found
226     for (Path fileName : files) {
227       try {
228         processFile(fileName);
229       } catch (IOException ex) {
230         LOG.error("Error reading " + fileName, ex);
231         System.exit(-2);
232       }
233     }
234 
235     if (verbose || printKey) {
236       System.out.println("Scanned kv count -> " + count);
237     }
238 
239     return 0;
240   }
241 
242   private void processFile(Path file) throws IOException {
243     if (verbose)
244       System.out.println("Scanning -> " + file);
245     FileSystem fs = file.getFileSystem(getConf());
246     if (!fs.exists(file)) {
247       System.err.println("ERROR, file doesnt exist: " + file);
248       System.exit(-2);
249     }
250 
251     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
252 
253     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
254 
255     KeyValueStatsCollector fileStats = null;
256 
257     if (verbose || printKey || checkRow || checkFamily || printStats) {
258       // scan over file and read key/value's and check if requested
259       HFileScanner scanner = reader.getScanner(false, false, false);
260       fileStats = new KeyValueStatsCollector();
261       boolean shouldScanKeysValues = false;
262       if (this.isSeekToRow) {
263         // seek to the first kv on this row
264         shouldScanKeysValues = 
265           (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row).getKey()) != -1);
266       } else {
267         shouldScanKeysValues = scanner.seekTo();
268       }
269       if (shouldScanKeysValues)
270         scanKeysValues(file, fileStats, scanner, row);
271     }
272 
273     // print meta data
274     if (shouldPrintMeta) {
275       printMeta(reader, fileInfo);
276     }
277 
278     if (printBlockIndex) {
279       System.out.println("Block Index:");
280       System.out.println(reader.getDataBlockIndexReader());
281     }
282 
283     if (printBlockHeaders) {
284       System.out.println("Block Headers:");
285       /*
286        * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
287        * TestLazyDataBlockDecompression. Refactor?
288        */
289       FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
290       long fileSize = fs.getFileStatus(file).getLen();
291       FixedFileTrailer trailer =
292         FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
293       long offset = trailer.getFirstDataBlockOffset(),
294         max = trailer.getLastDataBlockOffset();
295       HFileBlock block;
296       while (offset <= max) {
297         block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
298           /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
299         offset += block.getOnDiskSizeWithHeader();
300         System.out.println(block);
301       }
302     }
303 
304     if (printStats) {
305       fileStats.finish();
306       System.out.println("Stats:\n" + fileStats);
307     }
308 
309     reader.close();
310   }
311 
312   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
313       HFileScanner scanner,  byte[] row) throws IOException {
314     Cell pCell = null;
315     do {
316       Cell cell = scanner.getKeyValue();
317       if (row != null && row.length != 0) {
318         int result = CellComparator.compareRows(cell.getRowArray(), cell.getRowOffset(),
319             cell.getRowLength(), row, 0, row.length);
320         if (result > 0) {
321           break;
322         } else if (result < 0) {
323           continue;
324         }
325       }
326       // collect stats
327       if (printStats) {
328         fileStats.collect(cell);
329       }
330       // dump key value
331       if (printKey) {
332         System.out.print("K: " + cell);
333         if (printValue) {
334           System.out.print(" V: "
335               + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
336                   cell.getValueLength()));
337           int i = 0;
338           List<Tag> tags = Tag.asList(cell.getTagsArray(), cell.getTagsOffset(),
339               cell.getTagsLength());
340           for (Tag tag : tags) {
341             System.out.print(String.format(" T[%d]: %s", i++,
342                 Bytes.toStringBinary(tag.getBuffer(), tag.getTagOffset(), tag.getTagLength())));
343           }
344         }
345         System.out.println();
346       }
347       // check if rows are in order
348       if (checkRow && pCell != null) {
349         if (CellComparator.compareRows(pCell, cell) > 0) {
350           System.err.println("WARNING, previous row is greater then"
351               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
352               + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent  -> "
353               + CellUtil.getCellKeyAsString(cell));
354         }
355       }
356       // check if families are consistent
357       if (checkFamily) {
358         String fam = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(),
359             cell.getFamilyLength());
360         if (!file.toString().contains(fam)) {
361           System.err.println("WARNING, filename does not match kv family,"
362               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
363               + CellUtil.getCellKeyAsString(cell));
364         }
365         if (pCell != null && CellComparator.compareFamilies(pCell, cell) != 0) {
366           System.err.println("WARNING, previous kv has different family"
367               + " compared to current key\n\tfilename -> " + file
368               + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell)
369               + "\n\tcurrent  -> " + CellUtil.getCellKeyAsString(cell));
370         }
371       }
372       pCell = cell;
373       ++count;
374     } while (scanner.next());
375   }
376 
377   /**
378    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
379    * with a four-space indentation.
380    */
381   private static String asSeparateLines(String keyValueStr) {
382     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
383                                   ",\n" + FOUR_SPACES + "$1");
384   }
385 
386   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
387       throws IOException {
388     System.out.println("Block index size as per heapsize: "
389         + reader.indexSize());
390     System.out.println(asSeparateLines(reader.toString()));
391     System.out.println("Trailer:\n    "
392         + asSeparateLines(reader.getTrailer().toString()));
393     System.out.println("Fileinfo:");
394     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
395       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
396       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
397         long seqid = Bytes.toLong(e.getValue());
398         System.out.println(seqid);
399       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
400         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
401         Writables.copyWritable(e.getValue(), timeRangeTracker);
402         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
403             + timeRangeTracker.getMaximumTimestamp());
404       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
405           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
406         System.out.println(Bytes.toInt(e.getValue()));
407       } else {
408         System.out.println(Bytes.toStringBinary(e.getValue()));
409       }
410     }
411 
412     try {
413       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
414     } catch (Exception e) {
415       System.out.println ("Unable to retrieve the midkey");
416     }
417 
418     // Printing general bloom information
419     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
420     BloomFilter bloomFilter = null;
421     if (bloomMeta != null)
422       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
423 
424     System.out.println("Bloom filter:");
425     if (bloomFilter != null) {
426       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
427           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
428     } else {
429       System.out.println(FOUR_SPACES + "Not present");
430     }
431 
432     // Printing delete bloom information
433     bloomMeta = reader.getDeleteBloomFilterMetadata();
434     bloomFilter = null;
435     if (bloomMeta != null)
436       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
437 
438     System.out.println("Delete Family Bloom filter:");
439     if (bloomFilter != null) {
440       System.out.println(FOUR_SPACES
441           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
442               "\n" + FOUR_SPACES));
443     } else {
444       System.out.println(FOUR_SPACES + "Not present");
445     }
446   }
447 
448   private static class KeyValueStatsCollector {
449     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
450     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
451     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
452     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
453     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
454     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
455     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
456 
457     long curRowBytes = 0;
458     long curRowCols = 0;
459 
460     byte[] biggestRow = null;
461 
462     private Cell prevCell = null;
463     private long maxRowBytes = 0;
464     private long curRowKeyLength;
465 
466     public void collect(Cell cell) {
467       valLen.update(cell.getValueLength());
468       if (prevCell != null &&
469           KeyValue.COMPARATOR.compareRows(prevCell, cell) != 0) {
470         // new row
471         collectRow();
472       }
473       curRowBytes += KeyValueUtil.length(cell);
474       curRowKeyLength = KeyValueUtil.keyLength(cell);
475       curRowCols++;
476       prevCell = cell;
477     }
478 
479     private void collectRow() {
480       rowSizeBytes.update(curRowBytes);
481       rowSizeCols.update(curRowCols);
482       keyLen.update(curRowKeyLength);
483 
484       if (curRowBytes > maxRowBytes && prevCell != null) {
485         biggestRow = prevCell.getRow();
486         maxRowBytes = curRowBytes;
487       }
488 
489       curRowBytes = 0;
490       curRowCols = 0;
491     }
492 
493     public void finish() {
494       if (curRowCols > 0) {
495         collectRow();
496       }
497     }
498 
499     @Override
500     public String toString() {
501       if (prevCell == null)
502         return "no data available for statistics";
503 
504       // Dump the metrics to the output stream
505       simpleReporter.shutdown();
506       simpleReporter.run();
507       metricsRegistry.shutdown();
508 
509       return
510               metricsOutput.toString() +
511                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
512     }
513   }
514 
515   private static class SimpleReporter extends ConsoleReporter {
516     private final PrintStream out;
517 
518     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
519       super(metricsRegistry, out, MetricPredicate.ALL);
520       this.out = out;
521     }
522 
523     @Override
524     public void run() {
525       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
526               MetricPredicate.ALL).entrySet()) {
527         try {
528           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
529             out.print("   " + subEntry.getKey().getName());
530             out.println(':');
531 
532             subEntry.getValue().processWith(this, subEntry.getKey(), out);
533           }
534         } catch (Exception e) {
535           e.printStackTrace(out);
536         }
537       }
538     }
539 
540     @Override
541     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
542       super.processHistogram(name, histogram, stream);
543       stream.printf(Locale.getDefault(), "             count = %d%n", histogram.count());
544     }
545   }
546 
547   public static void main(String[] args) throws Exception {
548     Configuration conf = HBaseConfiguration.create();
549     // no need for a block cache
550     conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
551     int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
552     System.exit(ret);
553   }
554 }