View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import org.apache.commons.cli.CommandLine;
33  import org.apache.commons.cli.CommandLineParser;
34  import org.apache.commons.cli.HelpFormatter;
35  import org.apache.commons.cli.Option;
36  import org.apache.commons.cli.OptionGroup;
37  import org.apache.commons.cli.Options;
38  import org.apache.commons.cli.ParseException;
39  import org.apache.commons.cli.PosixParser;
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.hbase.classification.InterfaceAudience;
43  import org.apache.hadoop.hbase.classification.InterfaceStability;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.conf.Configured;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.Cell;
49  import org.apache.hadoop.hbase.CellComparator;
50  import org.apache.hadoop.hbase.CellUtil;
51  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
52  import org.apache.hadoop.hbase.HConstants;
53  import org.apache.hadoop.hbase.TableName;
54  import org.apache.hadoop.hbase.HBaseConfiguration;
55  import org.apache.hadoop.hbase.HRegionInfo;
56  import org.apache.hadoop.hbase.KeyValue;
57  import org.apache.hadoop.hbase.KeyValueUtil;
58  import org.apache.hadoop.hbase.Tag;
59  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
60  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
61  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
62  import org.apache.hadoop.hbase.util.BloomFilter;
63  import org.apache.hadoop.hbase.util.BloomFilterFactory;
64  import org.apache.hadoop.hbase.util.ByteBloomFilter;
65  import org.apache.hadoop.hbase.util.Bytes;
66  import org.apache.hadoop.hbase.util.FSUtils;
67  import org.apache.hadoop.hbase.util.Writables;
68  import org.apache.hadoop.util.Tool;
69  import org.apache.hadoop.util.ToolRunner;
70  
71  import com.yammer.metrics.core.Histogram;
72  import com.yammer.metrics.core.Metric;
73  import com.yammer.metrics.core.MetricName;
74  import com.yammer.metrics.core.MetricPredicate;
75  import com.yammer.metrics.core.MetricsRegistry;
76  import com.yammer.metrics.reporting.ConsoleReporter;
77  
78  /**
79   * Implements pretty-printing functionality for {@link HFile}s.
80   */
81  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
82  @InterfaceStability.Evolving
83  public class HFilePrettyPrinter extends Configured implements Tool {
84  
85    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
86  
87    private Options options = new Options();
88  
89    private boolean verbose;
90    private boolean printValue;
91    private boolean printKey;
92    private boolean shouldPrintMeta;
93    private boolean printBlockIndex;
94    private boolean printBlockHeaders;
95    private boolean printStats;
96    private boolean checkRow;
97    private boolean checkFamily;
98    private boolean isSeekToRow = false;
99  
100   /**
101    * The row which the user wants to specify and print all the KeyValues for.
102    */
103   private byte[] row = null;
104 
105   private List<Path> files = new ArrayList<Path>();
106   private int count;
107 
108   private static final String FOUR_SPACES = "    ";
109 
110   public HFilePrettyPrinter() {
111     super();
112     init();
113   }
114 
115   public HFilePrettyPrinter(Configuration conf) {
116     super(conf);
117     init();
118   }
119 
120   private void init() {
121     options.addOption("v", "verbose", false,
122         "Verbose output; emits file and meta data delimiters");
123     options.addOption("p", "printkv", false, "Print key/value pairs");
124     options.addOption("e", "printkey", false, "Print keys");
125     options.addOption("m", "printmeta", false, "Print meta data of file");
126     options.addOption("b", "printblocks", false, "Print block index meta data");
127     options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
128     options.addOption("k", "checkrow", false,
129         "Enable row order check; looks for out-of-order keys");
130     options.addOption("a", "checkfamily", false, "Enable family check");
131     options.addOption("w", "seekToRow", true,
132       "Seek to this row and print all the kvs for this row only");
133     options.addOption("s", "stats", false, "Print statistics");
134 
135     OptionGroup files = new OptionGroup();
136     files.addOption(new Option("f", "file", true,
137       "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
138     files.addOption(new Option("r", "region", true,
139       "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
140     options.addOptionGroup(files);
141   }
142 
143   public boolean parseOptions(String args[]) throws ParseException,
144       IOException {
145     if (args.length == 0) {
146       HelpFormatter formatter = new HelpFormatter();
147       formatter.printHelp("HFile", options, true);
148       return false;
149     }
150     CommandLineParser parser = new PosixParser();
151     CommandLine cmd = parser.parse(options, args);
152 
153     verbose = cmd.hasOption("v");
154     printValue = cmd.hasOption("p");
155     printKey = cmd.hasOption("e") || printValue;
156     shouldPrintMeta = cmd.hasOption("m");
157     printBlockIndex = cmd.hasOption("b");
158     printBlockHeaders = cmd.hasOption("h");
159     printStats = cmd.hasOption("s");
160     checkRow = cmd.hasOption("k");
161     checkFamily = cmd.hasOption("a");
162 
163     if (cmd.hasOption("f")) {
164       files.add(new Path(cmd.getOptionValue("f")));
165     }
166 
167     if (cmd.hasOption("w")) {
168       String key = cmd.getOptionValue("w");
169       if (key != null && key.length() != 0) {
170         row = Bytes.toBytesBinary(key);
171         isSeekToRow = true;
172       } else {
173         System.err.println("Invalid row is specified.");
174         System.exit(-1);
175       }
176     }
177 
178     if (cmd.hasOption("r")) {
179       String regionName = cmd.getOptionValue("r");
180       byte[] rn = Bytes.toBytes(regionName);
181       byte[][] hri = HRegionInfo.parseRegionName(rn);
182       Path rootDir = FSUtils.getRootDir(getConf());
183       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
184       String enc = HRegionInfo.encodeRegionName(rn);
185       Path regionDir = new Path(tableDir, enc);
186       if (verbose)
187         System.out.println("region dir -> " + regionDir);
188       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
189           regionDir);
190       if (verbose)
191         System.out.println("Number of region files found -> "
192             + regionFiles.size());
193       if (verbose) {
194         int i = 1;
195         for (Path p : regionFiles) {
196           if (verbose)
197             System.out.println("Found file[" + i++ + "] -> " + p);
198         }
199       }
200       files.addAll(regionFiles);
201     }
202 
203     return true;
204   }
205 
206   /**
207    * Runs the command-line pretty-printer, and returns the desired command
208    * exit code (zero for success, non-zero for failure).
209    */
210   @Override
211   public int run(String[] args) {
212     if (getConf() == null) {
213       throw new RuntimeException("A Configuration instance must be provided.");
214     }
215     try {
216       FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
217       if (!parseOptions(args))
218         return 1;
219     } catch (IOException ex) {
220       LOG.error("Error parsing command-line options", ex);
221       return 1;
222     } catch (ParseException ex) {
223       LOG.error("Error parsing command-line options", ex);
224       return 1;
225     }
226 
227     // iterate over all files found
228     for (Path fileName : files) {
229       try {
230         processFile(fileName);
231       } catch (IOException ex) {
232         LOG.error("Error reading " + fileName, ex);
233         System.exit(-2);
234       }
235     }
236 
237     if (verbose || printKey) {
238       System.out.println("Scanned kv count -> " + count);
239     }
240 
241     return 0;
242   }
243 
244   private void processFile(Path file) throws IOException {
245     if (verbose)
246       System.out.println("Scanning -> " + file);
247     FileSystem fs = file.getFileSystem(getConf());
248     if (!fs.exists(file)) {
249       System.err.println("ERROR, file doesnt exist: " + file);
250       System.exit(-2);
251     }
252 
253     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
254 
255     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
256 
257     KeyValueStatsCollector fileStats = null;
258 
259     if (verbose || printKey || checkRow || checkFamily || printStats) {
260       // scan over file and read key/value's and check if requested
261       HFileScanner scanner = reader.getScanner(false, false, false);
262       fileStats = new KeyValueStatsCollector();
263       boolean shouldScanKeysValues = false;
264       if (this.isSeekToRow) {
265         // seek to the first kv on this row
266         shouldScanKeysValues =
267           (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row).getKey()) != -1);
268       } else {
269         shouldScanKeysValues = scanner.seekTo();
270       }
271       if (shouldScanKeysValues)
272         scanKeysValues(file, fileStats, scanner, row);
273     }
274 
275     // print meta data
276     if (shouldPrintMeta) {
277       printMeta(reader, fileInfo);
278     }
279 
280     if (printBlockIndex) {
281       System.out.println("Block Index:");
282       System.out.println(reader.getDataBlockIndexReader());
283     }
284 
285     if (printBlockHeaders) {
286       System.out.println("Block Headers:");
287       /*
288        * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
289        * TestLazyDataBlockDecompression. Refactor?
290        */
291       FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
292       long fileSize = fs.getFileStatus(file).getLen();
293       FixedFileTrailer trailer =
294         FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
295       long offset = trailer.getFirstDataBlockOffset(),
296         max = trailer.getLastDataBlockOffset();
297       HFileBlock block;
298       while (offset <= max) {
299         block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
300           /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
301         offset += block.getOnDiskSizeWithHeader();
302         System.out.println(block);
303       }
304     }
305 
306     if (printStats) {
307       fileStats.finish();
308       System.out.println("Stats:\n" + fileStats);
309     }
310 
311     reader.close();
312   }
313 
314   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
315       HFileScanner scanner,  byte[] row) throws IOException {
316     Cell pCell = null;
317     do {
318       Cell cell = scanner.getKeyValue();
319       if (row != null && row.length != 0) {
320         int result = CellComparator.compareRows(cell.getRowArray(), cell.getRowOffset(),
321             cell.getRowLength(), row, 0, row.length);
322         if (result > 0) {
323           break;
324         } else if (result < 0) {
325           continue;
326         }
327       }
328       // collect stats
329       if (printStats) {
330         fileStats.collect(cell);
331       }
332       // dump key value
333       if (printKey) {
334         System.out.print("K: " + cell);
335         if (printValue) {
336           System.out.print(" V: "
337               + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
338                   cell.getValueLength()));
339           int i = 0;
340           List<Tag> tags = Tag.asList(cell.getTagsArray(), cell.getTagsOffset(),
341               cell.getTagsLength());
342           for (Tag tag : tags) {
343             System.out.print(String.format(" T[%d]: %s", i++,
344                 Bytes.toStringBinary(tag.getBuffer(), tag.getTagOffset(), tag.getTagLength())));
345           }
346         }
347         System.out.println();
348       }
349       // check if rows are in order
350       if (checkRow && pCell != null) {
351         if (CellComparator.compareRows(pCell, cell) > 0) {
352           System.err.println("WARNING, previous row is greater then"
353               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
354               + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent  -> "
355               + CellUtil.getCellKeyAsString(cell));
356         }
357       }
358       // check if families are consistent
359       if (checkFamily) {
360         String fam = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(),
361             cell.getFamilyLength());
362         if (!file.toString().contains(fam)) {
363           System.err.println("WARNING, filename does not match kv family,"
364               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
365               + CellUtil.getCellKeyAsString(cell));
366         }
367         if (pCell != null && CellComparator.compareFamilies(pCell, cell) != 0) {
368           System.err.println("WARNING, previous kv has different family"
369               + " compared to current key\n\tfilename -> " + file
370               + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell)
371               + "\n\tcurrent  -> " + CellUtil.getCellKeyAsString(cell));
372         }
373       }
374       pCell = cell;
375       ++count;
376     } while (scanner.next());
377   }
378 
379   /**
380    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
381    * with a four-space indentation.
382    */
383   private static String asSeparateLines(String keyValueStr) {
384     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
385                                   ",\n" + FOUR_SPACES + "$1");
386   }
387 
388   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
389       throws IOException {
390     System.out.println("Block index size as per heapsize: "
391         + reader.indexSize());
392     System.out.println(asSeparateLines(reader.toString()));
393     System.out.println("Trailer:\n    "
394         + asSeparateLines(reader.getTrailer().toString()));
395     System.out.println("Fileinfo:");
396     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
397       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
398       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
399         long seqid = Bytes.toLong(e.getValue());
400         System.out.println(seqid);
401       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
402         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
403         Writables.copyWritable(e.getValue(), timeRangeTracker);
404         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
405             + timeRangeTracker.getMaximumTimestamp());
406       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
407           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
408         System.out.println(Bytes.toInt(e.getValue()));
409       } else {
410         System.out.println(Bytes.toStringBinary(e.getValue()));
411       }
412     }
413 
414     try {
415       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
416     } catch (Exception e) {
417       System.out.println ("Unable to retrieve the midkey");
418     }
419 
420     // Printing general bloom information
421     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
422     BloomFilter bloomFilter = null;
423     if (bloomMeta != null)
424       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
425 
426     System.out.println("Bloom filter:");
427     if (bloomFilter != null) {
428       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
429           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
430     } else {
431       System.out.println(FOUR_SPACES + "Not present");
432     }
433 
434     // Printing delete bloom information
435     bloomMeta = reader.getDeleteBloomFilterMetadata();
436     bloomFilter = null;
437     if (bloomMeta != null)
438       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
439 
440     System.out.println("Delete Family Bloom filter:");
441     if (bloomFilter != null) {
442       System.out.println(FOUR_SPACES
443           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
444               "\n" + FOUR_SPACES));
445     } else {
446       System.out.println(FOUR_SPACES + "Not present");
447     }
448   }
449 
450   private static class KeyValueStatsCollector {
451     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
452     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
453     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
454     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
455     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
456     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
457     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
458 
459     long curRowBytes = 0;
460     long curRowCols = 0;
461 
462     byte[] biggestRow = null;
463 
464     private Cell prevCell = null;
465     private long maxRowBytes = 0;
466     private long curRowKeyLength;
467 
468     public void collect(Cell cell) {
469       valLen.update(cell.getValueLength());
470       if (prevCell != null &&
471           KeyValue.COMPARATOR.compareRows(prevCell, cell) != 0) {
472         // new row
473         collectRow();
474       }
475       curRowBytes += KeyValueUtil.length(cell);
476       curRowKeyLength = KeyValueUtil.keyLength(cell);
477       curRowCols++;
478       prevCell = cell;
479     }
480 
481     private void collectRow() {
482       rowSizeBytes.update(curRowBytes);
483       rowSizeCols.update(curRowCols);
484       keyLen.update(curRowKeyLength);
485 
486       if (curRowBytes > maxRowBytes && prevCell != null) {
487         biggestRow = prevCell.getRow();
488         maxRowBytes = curRowBytes;
489       }
490 
491       curRowBytes = 0;
492       curRowCols = 0;
493     }
494 
495     public void finish() {
496       if (curRowCols > 0) {
497         collectRow();
498       }
499     }
500 
501     @Override
502     public String toString() {
503       if (prevCell == null)
504         return "no data available for statistics";
505 
506       // Dump the metrics to the output stream
507       simpleReporter.shutdown();
508       simpleReporter.run();
509       metricsRegistry.shutdown();
510 
511       return
512               metricsOutput.toString() +
513                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
514     }
515   }
516 
517   private static class SimpleReporter extends ConsoleReporter {
518     private final PrintStream out;
519 
520     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
521       super(metricsRegistry, out, MetricPredicate.ALL);
522       this.out = out;
523     }
524 
525     @Override
526     public void run() {
527       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
528               MetricPredicate.ALL).entrySet()) {
529         try {
530           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
531             out.print("   " + subEntry.getKey().getName());
532             out.println(':');
533 
534             subEntry.getValue().processWith(this, subEntry.getKey(), out);
535           }
536         } catch (Exception e) {
537           e.printStackTrace(out);
538         }
539       }
540     }
541 
542     @Override
543     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
544       super.processHistogram(name, histogram, stream);
545       stream.printf(Locale.getDefault(), "             count = %d%n", histogram.count());
546     }
547   }
548 
549   public static void main(String[] args) throws Exception {
550     Configuration conf = HBaseConfiguration.create();
551     // no need for a block cache
552     conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
553     int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
554     System.exit(ret);
555   }
556 }