View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayOutputStream;
22  import java.io.DataInput;
23  import java.io.IOException;
24  import java.io.PrintStream;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Locale;
28  import java.util.Map;
29  import java.util.SortedMap;
30  
31  import org.apache.commons.cli.CommandLine;
32  import org.apache.commons.cli.CommandLineParser;
33  import org.apache.commons.cli.HelpFormatter;
34  import org.apache.commons.cli.Option;
35  import org.apache.commons.cli.OptionGroup;
36  import org.apache.commons.cli.Options;
37  import org.apache.commons.cli.ParseException;
38  import org.apache.commons.cli.PosixParser;
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.hbase.classification.InterfaceAudience;
42  import org.apache.hadoop.hbase.classification.InterfaceStability;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.conf.Configured;
45  import org.apache.hadoop.fs.FileSystem;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.Cell;
48  import org.apache.hadoop.hbase.CellComparator;
49  import org.apache.hadoop.hbase.CellUtil;
50  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.TableName;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HRegionInfo;
55  import org.apache.hadoop.hbase.KeyValue;
56  import org.apache.hadoop.hbase.KeyValueUtil;
57  import org.apache.hadoop.hbase.Tag;
58  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
59  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
60  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
61  import org.apache.hadoop.hbase.util.BloomFilter;
62  import org.apache.hadoop.hbase.util.BloomFilterFactory;
63  import org.apache.hadoop.hbase.util.ByteBloomFilter;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.FSUtils;
66  import org.apache.hadoop.hbase.util.Writables;
67  import org.apache.hadoop.util.Tool;
68  import org.apache.hadoop.util.ToolRunner;
69  
70  import com.yammer.metrics.core.Histogram;
71  import com.yammer.metrics.core.Metric;
72  import com.yammer.metrics.core.MetricName;
73  import com.yammer.metrics.core.MetricPredicate;
74  import com.yammer.metrics.core.MetricsRegistry;
75  import com.yammer.metrics.reporting.ConsoleReporter;
76  
77  /**
78   * Implements pretty-printing functionality for {@link HFile}s.
79   */
80  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
81  @InterfaceStability.Evolving
82  public class HFilePrettyPrinter extends Configured implements Tool {
83  
84    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
85  
86    private Options options = new Options();
87  
88    private boolean verbose;
89    private boolean printValue;
90    private boolean printKey;
91    private boolean shouldPrintMeta;
92    private boolean printBlockIndex;
93    private boolean printBlockHeaders;
94    private boolean printStats;
95    private boolean checkRow;
96    private boolean checkFamily;
97    private boolean isSeekToRow = false;
98  
99    private PrintStream out = System.out;
100   private PrintStream err = System.err;
101 
102   /**
103    * The row which the user wants to specify and print all the KeyValues for.
104    */
105   private byte[] row = null;
106 
107   private List<Path> files = new ArrayList<Path>();
108   private int count;
109 
110   private static final String FOUR_SPACES = "    ";
111 
112   public HFilePrettyPrinter() {
113     super();
114     init();
115   }
116 
117   public HFilePrettyPrinter(Configuration conf) {
118     super(conf);
119     init();
120   }
121 
122   private void init() {
123     options.addOption("v", "verbose", false,
124         "Verbose output; emits file and meta data delimiters");
125     options.addOption("p", "printkv", false, "Print key/value pairs");
126     options.addOption("e", "printkey", false, "Print keys");
127     options.addOption("m", "printmeta", false, "Print meta data of file");
128     options.addOption("b", "printblocks", false, "Print block index meta data");
129     options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
130     options.addOption("k", "checkrow", false,
131         "Enable row order check; looks for out-of-order keys");
132     options.addOption("a", "checkfamily", false, "Enable family check");
133     options.addOption("w", "seekToRow", true,
134       "Seek to this row and print all the kvs for this row only");
135     options.addOption("s", "stats", false, "Print statistics");
136 
137     OptionGroup files = new OptionGroup();
138     files.addOption(new Option("f", "file", true,
139       "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
140     files.addOption(new Option("r", "region", true,
141       "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
142     options.addOptionGroup(files);
143   }
144 
145   public void setPrintStreams(PrintStream out, PrintStream err) {
146     this.out = out;
147     this.err = err;
148   }
149 
150   public boolean parseOptions(String args[]) throws ParseException,
151       IOException {
152     if (args.length == 0) {
153       HelpFormatter formatter = new HelpFormatter();
154       formatter.printHelp("HFile", options, true);
155       return false;
156     }
157     CommandLineParser parser = new PosixParser();
158     CommandLine cmd = parser.parse(options, args);
159 
160     verbose = cmd.hasOption("v");
161     printValue = cmd.hasOption("p");
162     printKey = cmd.hasOption("e") || printValue;
163     shouldPrintMeta = cmd.hasOption("m");
164     printBlockIndex = cmd.hasOption("b");
165     printBlockHeaders = cmd.hasOption("h");
166     printStats = cmd.hasOption("s");
167     checkRow = cmd.hasOption("k");
168     checkFamily = cmd.hasOption("a");
169 
170     if (cmd.hasOption("f")) {
171       files.add(new Path(cmd.getOptionValue("f")));
172     }
173 
174     if (cmd.hasOption("w")) {
175       String key = cmd.getOptionValue("w");
176       if (key != null && key.length() != 0) {
177         row = Bytes.toBytesBinary(key);
178         isSeekToRow = true;
179       } else {
180         err.println("Invalid row is specified.");
181         System.exit(-1);
182       }
183     }
184 
185     if (cmd.hasOption("r")) {
186       String regionName = cmd.getOptionValue("r");
187       byte[] rn = Bytes.toBytes(regionName);
188       byte[][] hri = HRegionInfo.parseRegionName(rn);
189       Path rootDir = FSUtils.getRootDir(getConf());
190       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
191       String enc = HRegionInfo.encodeRegionName(rn);
192       Path regionDir = new Path(tableDir, enc);
193       if (verbose)
194         out.println("region dir -> " + regionDir);
195       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
196           regionDir);
197       if (verbose)
198         out.println("Number of region files found -> "
199             + regionFiles.size());
200       if (verbose) {
201         int i = 1;
202         for (Path p : regionFiles) {
203           if (verbose)
204             out.println("Found file[" + i++ + "] -> " + p);
205         }
206       }
207       files.addAll(regionFiles);
208     }
209 
210     return true;
211   }
212 
213   /**
214    * Runs the command-line pretty-printer, and returns the desired command
215    * exit code (zero for success, non-zero for failure).
216    */
217   @Override
218   public int run(String[] args) {
219     if (getConf() == null) {
220       throw new RuntimeException("A Configuration instance must be provided.");
221     }
222     try {
223       FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
224       if (!parseOptions(args))
225         return 1;
226     } catch (IOException ex) {
227       LOG.error("Error parsing command-line options", ex);
228       return 1;
229     } catch (ParseException ex) {
230       LOG.error("Error parsing command-line options", ex);
231       return 1;
232     }
233 
234     // iterate over all files found
235     for (Path fileName : files) {
236       try {
237         int exitCode = processFile(fileName);
238         if (exitCode != 0) {
239           return exitCode;
240         }
241       } catch (IOException ex) {
242         LOG.error("Error reading " + fileName, ex);
243         return -2;
244       }
245     }
246 
247     if (verbose || printKey) {
248       out.println("Scanned kv count -> " + count);
249     }
250 
251     return 0;
252   }
253 
254   public int processFile(Path file) throws IOException {
255     if (verbose)
256       out.println("Scanning -> " + file);
257 
258     Path rootPath = FSUtils.getRootDir(getConf());
259     String rootString = rootPath + rootPath.SEPARATOR;
260     if (!file.toString().startsWith(rootString)) {
261       // First we see if fully-qualified URI matches the root dir. It might
262       // also be an absolute path in the same filesystem, so we prepend the FS
263       // of the root dir and see if that fully-qualified URI matches.
264       FileSystem rootFS = rootPath.getFileSystem(getConf());
265       String qualifiedFile = rootFS.getUri().toString() + file.toString();
266       if (!qualifiedFile.startsWith(rootString)) {
267         err.println("ERROR, file (" + file +
268             ") is not in HBase's root directory (" + rootString + ")");
269         return -2;
270       }
271     }
272 
273     FileSystem fs = file.getFileSystem(getConf());
274     if (!fs.exists(file)) {
275       err.println("ERROR, file doesnt exist: " + file);
276       return -2;
277     }
278 
279     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
280 
281     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
282 
283     KeyValueStatsCollector fileStats = null;
284 
285     if (verbose || printKey || checkRow || checkFamily || printStats) {
286       // scan over file and read key/value's and check if requested
287       HFileScanner scanner = reader.getScanner(false, false, false);
288       fileStats = new KeyValueStatsCollector();
289       boolean shouldScanKeysValues = false;
290       if (this.isSeekToRow) {
291         // seek to the first kv on this row
292         shouldScanKeysValues =
293           (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row).getKey()) != -1);
294       } else {
295         shouldScanKeysValues = scanner.seekTo();
296       }
297       if (shouldScanKeysValues)
298         scanKeysValues(file, fileStats, scanner, row);
299     }
300 
301     // print meta data
302     if (shouldPrintMeta) {
303       printMeta(reader, fileInfo);
304     }
305 
306     if (printBlockIndex) {
307       out.println("Block Index:");
308       out.println(reader.getDataBlockIndexReader());
309     }
310 
311     if (printBlockHeaders) {
312       out.println("Block Headers:");
313       /*
314        * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
315        * TestLazyDataBlockDecompression. Refactor?
316        */
317       FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
318       long fileSize = fs.getFileStatus(file).getLen();
319       FixedFileTrailer trailer =
320         FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
321       long offset = trailer.getFirstDataBlockOffset(),
322         max = trailer.getLastDataBlockOffset();
323       HFileBlock block;
324       while (offset <= max) {
325         block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
326           /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
327         offset += block.getOnDiskSizeWithHeader();
328         out.println(block);
329       }
330     }
331 
332     if (printStats) {
333       fileStats.finish();
334       out.println("Stats:\n" + fileStats);
335     }
336 
337     reader.close();
338     return 0;
339   }
340 
341   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
342       HFileScanner scanner,  byte[] row) throws IOException {
343     Cell pCell = null;
344     do {
345       Cell cell = scanner.getKeyValue();
346       if (row != null && row.length != 0) {
347         int result = CellComparator.compareRows(cell.getRowArray(), cell.getRowOffset(),
348             cell.getRowLength(), row, 0, row.length);
349         if (result > 0) {
350           break;
351         } else if (result < 0) {
352           continue;
353         }
354       }
355       // collect stats
356       if (printStats) {
357         fileStats.collect(cell);
358       }
359       // dump key value
360       if (printKey) {
361         out.print("K: " + cell);
362         if (printValue) {
363           out.print(" V: "
364               + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
365                   cell.getValueLength()));
366           int i = 0;
367           List<Tag> tags = Tag.asList(cell.getTagsArray(), cell.getTagsOffset(),
368               cell.getTagsLength());
369           for (Tag tag : tags) {
370             out.print(String.format(" T[%d]: %s", i++,
371                 Bytes.toStringBinary(tag.getBuffer(), tag.getTagOffset(), tag.getTagLength())));
372           }
373         }
374         out.println();
375       }
376       // check if rows are in order
377       if (checkRow && pCell != null) {
378         if (CellComparator.compareRows(pCell, cell) > 0) {
379           err.println("WARNING, previous row is greater then"
380               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
381               + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent  -> "
382               + CellUtil.getCellKeyAsString(cell));
383         }
384       }
385       // check if families are consistent
386       if (checkFamily) {
387         String fam = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(),
388             cell.getFamilyLength());
389         if (!file.toString().contains(fam)) {
390           err.println("WARNING, filename does not match kv family,"
391               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
392               + CellUtil.getCellKeyAsString(cell));
393         }
394         if (pCell != null && CellComparator.compareFamilies(pCell, cell) != 0) {
395           err.println("WARNING, previous kv has different family"
396               + " compared to current key\n\tfilename -> " + file
397               + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell)
398               + "\n\tcurrent  -> " + CellUtil.getCellKeyAsString(cell));
399         }
400       }
401       pCell = cell;
402       ++count;
403     } while (scanner.next());
404   }
405 
406   /**
407    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
408    * with a four-space indentation.
409    */
410   private static String asSeparateLines(String keyValueStr) {
411     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
412                                   ",\n" + FOUR_SPACES + "$1");
413   }
414 
415   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
416       throws IOException {
417     out.println("Block index size as per heapsize: "
418         + reader.indexSize());
419     out.println(asSeparateLines(reader.toString()));
420     out.println("Trailer:\n    "
421         + asSeparateLines(reader.getTrailer().toString()));
422     out.println("Fileinfo:");
423     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
424       out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
425       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
426         long seqid = Bytes.toLong(e.getValue());
427         out.println(seqid);
428       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
429 
430         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
431         Writables.copyWritable(e.getValue(), timeRangeTracker);
432         out.println(timeRangeTracker.getMinimumTimestamp() + "...."
433             + timeRangeTracker.getMaximumTimestamp());
434       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
435           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
436         out.println(Bytes.toInt(e.getValue()));
437       } else {
438         out.println(Bytes.toStringBinary(e.getValue()));
439       }
440     }
441 
442     try {
443 
444       out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
445     } catch (Exception e) {
446       out.println ("Unable to retrieve the midkey");
447     }
448 
449     // Printing general bloom information
450     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
451     BloomFilter bloomFilter = null;
452     if (bloomMeta != null)
453       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
454 
455     out.println("Bloom filter:");
456     if (bloomFilter != null) {
457       out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
458           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
459     } else {
460       out.println(FOUR_SPACES + "Not present");
461     }
462 
463     // Printing delete bloom information
464     bloomMeta = reader.getDeleteBloomFilterMetadata();
465     bloomFilter = null;
466     if (bloomMeta != null)
467       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
468 
469     out.println("Delete Family Bloom filter:");
470     if (bloomFilter != null) {
471       out.println(FOUR_SPACES
472           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
473               "\n" + FOUR_SPACES));
474     } else {
475       out.println(FOUR_SPACES + "Not present");
476     }
477   }
478 
479   private static class KeyValueStatsCollector {
480     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
481     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
482     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
483     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
484     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
485     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
486     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
487 
488     long curRowBytes = 0;
489     long curRowCols = 0;
490 
491     byte[] biggestRow = null;
492 
493     private Cell prevCell = null;
494     private long maxRowBytes = 0;
495     private long curRowKeyLength;
496 
497     public void collect(Cell cell) {
498       valLen.update(cell.getValueLength());
499       if (prevCell != null &&
500           KeyValue.COMPARATOR.compareRows(prevCell, cell) != 0) {
501         // new row
502         collectRow();
503       }
504       curRowBytes += KeyValueUtil.length(cell);
505       curRowKeyLength = KeyValueUtil.keyLength(cell);
506       curRowCols++;
507       prevCell = cell;
508     }
509 
510     private void collectRow() {
511       rowSizeBytes.update(curRowBytes);
512       rowSizeCols.update(curRowCols);
513       keyLen.update(curRowKeyLength);
514 
515       if (curRowBytes > maxRowBytes && prevCell != null) {
516         biggestRow = prevCell.getRow();
517         maxRowBytes = curRowBytes;
518       }
519 
520       curRowBytes = 0;
521       curRowCols = 0;
522     }
523 
524     public void finish() {
525       if (curRowCols > 0) {
526         collectRow();
527       }
528     }
529 
530     @Override
531     public String toString() {
532       if (prevCell == null)
533         return "no data available for statistics";
534 
535       // Dump the metrics to the output stream
536       simpleReporter.shutdown();
537       simpleReporter.run();
538       metricsRegistry.shutdown();
539 
540       return
541               metricsOutput.toString() +
542                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
543     }
544   }
545 
546   private static class SimpleReporter extends ConsoleReporter {
547     private final PrintStream out;
548 
549     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
550       super(metricsRegistry, out, MetricPredicate.ALL);
551       this.out = out;
552     }
553 
554     @Override
555     public void run() {
556       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
557               MetricPredicate.ALL).entrySet()) {
558         try {
559           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
560             out.print("   " + subEntry.getKey().getName());
561             out.println(':');
562 
563             subEntry.getValue().processWith(this, subEntry.getKey(), out);
564           }
565         } catch (Exception e) {
566           e.printStackTrace(out);
567         }
568       }
569     }
570 
571     @Override
572     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
573       super.processHistogram(name, histogram, stream);
574       stream.printf(Locale.getDefault(), "             count = %d%n", histogram.count());
575     }
576   }
577 
578   public static void main(String[] args) throws Exception {
579     Configuration conf = HBaseConfiguration.create();
580     // no need for a block cache
581     conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
582     int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
583     System.exit(ret);
584   }
585 }