View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import org.apache.commons.cli.CommandLine;
33  import org.apache.commons.cli.CommandLineParser;
34  import org.apache.commons.cli.HelpFormatter;
35  import org.apache.commons.cli.Options;
36  import org.apache.commons.cli.ParseException;
37  import org.apache.commons.cli.PosixParser;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.classification.InterfaceAudience;
41  import org.apache.hadoop.classification.InterfaceStability;
42  import org.apache.hadoop.conf.Configuration;
43  import org.apache.hadoop.fs.FileSystem;
44  import org.apache.hadoop.fs.Path;
45  import org.apache.hadoop.hbase.HBaseConfiguration;
46  import org.apache.hadoop.hbase.HRegionInfo;
47  import org.apache.hadoop.hbase.KeyValue;
48  import org.apache.hadoop.hbase.KeyValueUtil;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.Tag;
51  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
52  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
53  import org.apache.hadoop.hbase.util.BloomFilter;
54  import org.apache.hadoop.hbase.util.BloomFilterFactory;
55  import org.apache.hadoop.hbase.util.ByteBloomFilter;
56  import org.apache.hadoop.hbase.util.Bytes;
57  import org.apache.hadoop.hbase.util.FSUtils;
58  import org.apache.hadoop.hbase.util.Writables;
59  
60  import com.yammer.metrics.core.Histogram;
61  import com.yammer.metrics.core.Metric;
62  import com.yammer.metrics.core.MetricName;
63  import com.yammer.metrics.core.MetricPredicate;
64  import com.yammer.metrics.core.MetricsRegistry;
65  import com.yammer.metrics.reporting.ConsoleReporter;
66  
67  /**
68   * Implements pretty-printing functionality for {@link HFile}s.
69   */
70  @InterfaceAudience.Public
71  @InterfaceStability.Evolving
72  public class HFilePrettyPrinter {
73  
74    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
75  
76    private Options options = new Options();
77  
78    private boolean verbose;
79    private boolean printValue;
80    private boolean printKey;
81    private boolean shouldPrintMeta;
82    private boolean printBlocks;
83    private boolean printStats;
84    private boolean checkRow;
85    private boolean checkFamily;
86    private boolean isSeekToRow = false;
87  
88    /**
89     * The row which the user wants to specify and print all the KeyValues for.
90     */
91    private byte[] row = null;
92    private Configuration conf;
93  
94    private List<Path> files = new ArrayList<Path>();
95    private int count;
96  
97    private static final String FOUR_SPACES = "    ";
98  
99    public HFilePrettyPrinter() {
100     options.addOption("v", "verbose", false,
101         "Verbose output; emits file and meta data delimiters");
102     options.addOption("p", "printkv", false, "Print key/value pairs");
103     options.addOption("e", "printkey", false, "Print keys");
104     options.addOption("m", "printmeta", false, "Print meta data of file");
105     options.addOption("b", "printblocks", false, "Print block index meta data");
106     options.addOption("k", "checkrow", false,
107         "Enable row order check; looks for out-of-order keys");
108     options.addOption("a", "checkfamily", false, "Enable family check");
109     options.addOption("f", "file", true,
110         "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34");
111     options.addOption("w", "seekToRow", true,
112       "Seek to this row and print all the kvs for this row only");
113     options.addOption("r", "region", true,
114         "Region to scan. Pass region name; e.g. 'hbase:meta,,1'");
115     options.addOption("s", "stats", false, "Print statistics");
116   }
117 
118   public boolean parseOptions(String args[]) throws ParseException,
119       IOException {
120     if (args.length == 0) {
121       HelpFormatter formatter = new HelpFormatter();
122       formatter.printHelp("HFile", options, true);
123       return false;
124     }
125     CommandLineParser parser = new PosixParser();
126     CommandLine cmd = parser.parse(options, args);
127 
128     verbose = cmd.hasOption("v");
129     printValue = cmd.hasOption("p");
130     printKey = cmd.hasOption("e") || printValue;
131     shouldPrintMeta = cmd.hasOption("m");
132     printBlocks = cmd.hasOption("b");
133     printStats = cmd.hasOption("s");
134     checkRow = cmd.hasOption("k");
135     checkFamily = cmd.hasOption("a");
136 
137     if (cmd.hasOption("f")) {
138       files.add(new Path(cmd.getOptionValue("f")));
139     }
140 
141     if (cmd.hasOption("w")) {
142       String key = cmd.getOptionValue("w");
143       if (key != null && key.length() != 0) {
144         row = key.getBytes();
145         isSeekToRow = true;
146       } else {
147         System.err.println("Invalid row is specified.");
148         System.exit(-1);
149       }
150     }
151 
152     if (cmd.hasOption("r")) {
153       String regionName = cmd.getOptionValue("r");
154       byte[] rn = Bytes.toBytes(regionName);
155       byte[][] hri = HRegionInfo.parseRegionName(rn);
156       Path rootDir = FSUtils.getRootDir(conf);
157       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
158       String enc = HRegionInfo.encodeRegionName(rn);
159       Path regionDir = new Path(tableDir, enc);
160       if (verbose)
161         System.out.println("region dir -> " + regionDir);
162       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
163           regionDir);
164       if (verbose)
165         System.out.println("Number of region files found -> "
166             + regionFiles.size());
167       if (verbose) {
168         int i = 1;
169         for (Path p : regionFiles) {
170           if (verbose)
171             System.out.println("Found file[" + i++ + "] -> " + p);
172         }
173       }
174       files.addAll(regionFiles);
175     }
176 
177     return true;
178   }
179 
180   /**
181    * Runs the command-line pretty-printer, and returns the desired command
182    * exit code (zero for success, non-zero for failure).
183    */
184   public int run(String[] args) {
185     conf = HBaseConfiguration.create();
186     try {
187       FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
188       if (!parseOptions(args))
189         return 1;
190     } catch (IOException ex) {
191       LOG.error("Error parsing command-line options", ex);
192       return 1;
193     } catch (ParseException ex) {
194       LOG.error("Error parsing command-line options", ex);
195       return 1;
196     }
197 
198     // iterate over all files found
199     for (Path fileName : files) {
200       try {
201         processFile(fileName);
202       } catch (IOException ex) {
203         LOG.error("Error reading " + fileName, ex);
204       }
205     }
206 
207     if (verbose || printKey) {
208       System.out.println("Scanned kv count -> " + count);
209     }
210 
211     return 0;
212   }
213 
214   private void processFile(Path file) throws IOException {
215     if (verbose)
216       System.out.println("Scanning -> " + file);
217     FileSystem fs = file.getFileSystem(conf);
218     if (!fs.exists(file)) {
219       System.err.println("ERROR, file doesnt exist: " + file);
220     }
221 
222     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf), conf);
223 
224     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
225 
226     KeyValueStatsCollector fileStats = null;
227 
228     if (verbose || printKey || checkRow || checkFamily || printStats) {
229       // scan over file and read key/value's and check if requested
230       HFileScanner scanner = reader.getScanner(false, false, false);
231       fileStats = new KeyValueStatsCollector();
232       boolean shouldScanKeysValues = false;
233       if (this.isSeekToRow) {
234         // seek to the first kv on this row
235         shouldScanKeysValues = 
236           (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row).getKey()) != -1);
237       } else {
238         shouldScanKeysValues = scanner.seekTo();
239       }
240       if (shouldScanKeysValues)
241         scanKeysValues(file, fileStats, scanner, row);
242     }
243 
244     // print meta data
245     if (shouldPrintMeta) {
246       printMeta(reader, fileInfo);
247     }
248 
249     if (printBlocks) {
250       System.out.println("Block Index:");
251       System.out.println(reader.getDataBlockIndexReader());
252     }
253 
254     if (printStats) {
255       fileStats.finish();
256       System.out.println("Stats:\n" + fileStats);
257     }
258 
259     reader.close();
260   }
261 
262   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
263       HFileScanner scanner,  byte[] row) throws IOException {
264     KeyValue pkv = null;
265     do {
266       KeyValue kv = KeyValueUtil.ensureKeyValue(scanner.getKeyValue());
267       if (row != null && row.length != 0) {
268         int result = Bytes.compareTo(kv.getRow(), row);
269         if (result > 0) {
270           break;
271         } else if (result < 0) {
272           continue;
273         }
274       }
275       // collect stats
276       if (printStats) {
277         fileStats.collect(kv);
278       }
279       // dump key value
280       if (printKey) {
281         System.out.print("K: " + kv);
282         if (printValue) {
283           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
284           int i = 0;
285           List<Tag> tags = kv.getTags();
286           for (Tag tag : tags) {
287             System.out
288                 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
289           }
290         }
291         System.out.println();
292       }
293       // check if rows are in order
294       if (checkRow && pkv != null) {
295         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
296           System.err.println("WARNING, previous row is greater then"
297               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
298               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
299               + Bytes.toStringBinary(kv.getKey()));
300         }
301       }
302       // check if families are consistent
303       if (checkFamily) {
304         String fam = Bytes.toString(kv.getFamily());
305         if (!file.toString().contains(fam)) {
306           System.err.println("WARNING, filename does not match kv family,"
307               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
308               + Bytes.toStringBinary(kv.getKey()));
309         }
310         if (pkv != null
311             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
312           System.err.println("WARNING, previous kv has different family"
313               + " compared to current key\n\tfilename -> " + file
314               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
315               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
316         }
317       }
318       pkv = kv;
319       ++count;
320     } while (scanner.next());
321   }
322 
323   /**
324    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
325    * with a four-space indentation.
326    */
327   private static String asSeparateLines(String keyValueStr) {
328     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
329                                   ",\n" + FOUR_SPACES + "$1");
330   }
331 
332   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
333       throws IOException {
334     System.out.println("Block index size as per heapsize: "
335         + reader.indexSize());
336     System.out.println(asSeparateLines(reader.toString()));
337     System.out.println("Trailer:\n    "
338         + asSeparateLines(reader.getTrailer().toString()));
339     System.out.println("Fileinfo:");
340     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
341       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
342       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
343         long seqid = Bytes.toLong(e.getValue());
344         System.out.println(seqid);
345       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
346         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
347         Writables.copyWritable(e.getValue(), timeRangeTracker);
348         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
349             + timeRangeTracker.getMaximumTimestamp());
350       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
351           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
352         System.out.println(Bytes.toInt(e.getValue()));
353       } else {
354         System.out.println(Bytes.toStringBinary(e.getValue()));
355       }
356     }
357 
358     try {
359       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
360     } catch (Exception e) {
361       System.out.println ("Unable to retrieve the midkey");
362     }
363 
364     // Printing general bloom information
365     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
366     BloomFilter bloomFilter = null;
367     if (bloomMeta != null)
368       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
369 
370     System.out.println("Bloom filter:");
371     if (bloomFilter != null) {
372       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
373           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
374     } else {
375       System.out.println(FOUR_SPACES + "Not present");
376     }
377 
378     // Printing delete bloom information
379     bloomMeta = reader.getDeleteBloomFilterMetadata();
380     bloomFilter = null;
381     if (bloomMeta != null)
382       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
383 
384     System.out.println("Delete Family Bloom filter:");
385     if (bloomFilter != null) {
386       System.out.println(FOUR_SPACES
387           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
388               "\n" + FOUR_SPACES));
389     } else {
390       System.out.println(FOUR_SPACES + "Not present");
391     }
392   }
393 
394   private static class KeyValueStatsCollector {
395     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
396     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
397     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
398     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
399     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
400     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
401     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
402 
403     long curRowBytes = 0;
404     long curRowCols = 0;
405 
406     byte[] biggestRow = null;
407 
408     private KeyValue prevKV = null;
409     private long maxRowBytes = 0;
410     private long curRowKeyLength;
411 
412     public void collect(KeyValue kv) {
413       valLen.update(kv.getValueLength());
414       if (prevKV != null &&
415           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
416         // new row
417         collectRow();
418       }
419       curRowBytes += kv.getLength();
420       curRowKeyLength = kv.getKeyLength();
421       curRowCols++;
422       prevKV = kv;
423     }
424 
425     private void collectRow() {
426       rowSizeBytes.update(curRowBytes);
427       rowSizeCols.update(curRowCols);
428       keyLen.update(curRowKeyLength);
429 
430       if (curRowBytes > maxRowBytes && prevKV != null) {
431         biggestRow = prevKV.getRow();
432         maxRowBytes = curRowBytes;
433       }
434 
435       curRowBytes = 0;
436       curRowCols = 0;
437     }
438 
439     public void finish() {
440       if (curRowCols > 0) {
441         collectRow();
442       }
443     }
444 
445     @Override
446     public String toString() {
447       if (prevKV == null)
448         return "no data available for statistics";
449 
450       // Dump the metrics to the output stream
451       simpleReporter.shutdown();
452       simpleReporter.run();
453       metricsRegistry.shutdown();
454 
455       return
456               metricsOutput.toString() +
457                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
458     }
459   }
460 
461   private static class SimpleReporter extends ConsoleReporter {
462     private final PrintStream out;
463 
464     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
465       super(metricsRegistry, out, MetricPredicate.ALL);
466       this.out = out;
467     }
468 
469     @Override
470     public void run() {
471       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
472               MetricPredicate.ALL).entrySet()) {
473         try {
474           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
475             out.print("   " + subEntry.getKey().getName());
476             out.println(':');
477 
478             subEntry.getValue().processWith(this, subEntry.getKey(), out);
479           }
480         } catch (Exception e) {
481           e.printStackTrace(out);
482         }
483       }
484     }
485 
486     @Override
487     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
488       super.processHistogram(name, histogram, stream);
489       stream.printf(Locale.getDefault(), "             count = %d\n", histogram.count());
490     }
491   }
492 }