View Javadoc

1   
2   /*
3    * Copyright 2011 The Apache Software Foundation
4    *
5    * Licensed to the Apache Software Foundation (ASF) under one
6    * or more contributor license agreements.  See the NOTICE file
7    * distributed with this work for additional information
8    * regarding copyright ownership.  The ASF licenses this file
9    * to you under the Apache License, Version 2.0 (the
10   * "License"); you may not use this file except in compliance
11   * with the License.  You may obtain a copy of the License at
12   *
13   *     http://www.apache.org/licenses/LICENSE-2.0
14   *
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   */
21  package org.apache.hadoop.hbase.io.hfile;
22  
23  import java.io.ByteArrayOutputStream;
24  import java.io.DataInput;
25  import java.io.IOException;
26  import java.io.PrintStream;
27  import java.util.ArrayList;
28  import java.util.List;
29  import java.util.Locale;
30  import java.util.Map;
31  import java.util.SortedMap;
32  
33  import com.yammer.metrics.core.*;
34  import com.yammer.metrics.reporting.ConsoleReporter;
35  
36  import org.apache.commons.cli.CommandLine;
37  import org.apache.commons.cli.CommandLineParser;
38  import org.apache.commons.cli.HelpFormatter;
39  import org.apache.commons.cli.Options;
40  import org.apache.commons.cli.ParseException;
41  import org.apache.commons.cli.PosixParser;
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.fs.FileSystem;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.HBaseConfiguration;
48  import org.apache.hadoop.hbase.HRegionInfo;
49  import org.apache.hadoop.hbase.KeyValue;
50  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
51  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
52  import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
53  import org.apache.hadoop.hbase.util.BloomFilter;
54  import org.apache.hadoop.hbase.util.BloomFilterFactory;
55  import org.apache.hadoop.hbase.util.ByteBloomFilter;
56  import org.apache.hadoop.hbase.util.Bytes;
57  import org.apache.hadoop.hbase.util.FSUtils;
58  import org.apache.hadoop.hbase.util.Writables;
59  
60  /**
61   * Implements pretty-printing functionality for {@link HFile}s.
62   */
63  public class HFilePrettyPrinter {
64  
65    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
66  
67    private Options options = new Options();
68  
69    private boolean verbose;
70    private boolean printValue;
71    private boolean printKey;
72    private boolean shouldPrintMeta;
73    private boolean printBlocks;
74    private boolean printStats;
75    private boolean checkRow;
76    private boolean checkFamily;
77    private boolean isSeekToRow = false;
78  
79    /**
80     * The row which the user wants to specify and print all the KeyValues for.
81     */
82    private byte[] row = null;
83    private Configuration conf;
84  
85    private List<Path> files = new ArrayList<Path>();
86    private int count;
87  
88    private static final String FOUR_SPACES = "    ";
89  
90    public HFilePrettyPrinter() {
91      options.addOption("v", "verbose", false,
92          "Verbose output; emits file and meta data delimiters");
93      options.addOption("p", "printkv", false, "Print key/value pairs");
94      options.addOption("e", "printkey", false, "Print keys");
95      options.addOption("m", "printmeta", false, "Print meta data of file");
96      options.addOption("b", "printblocks", false, "Print block index meta data");
97      options.addOption("k", "checkrow", false,
98          "Enable row order check; looks for out-of-order keys");
99      options.addOption("a", "checkfamily", false, "Enable family check");
100     options.addOption("f", "file", true,
101         "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
102     options.addOption("w", "seekToRow", true,
103       "Seek to this row and print all the kvs for this row only");
104     options.addOption("r", "region", true,
105         "Region to scan. Pass region name; e.g. '.META.,,1'");
106     options.addOption("s", "stats", false, "Print statistics");
107   }
108 
109   public boolean parseOptions(String args[]) throws ParseException,
110       IOException {
111     if (args.length == 0) {
112       HelpFormatter formatter = new HelpFormatter();
113       formatter.printHelp("HFile", options, true);
114       return false;
115     }
116     CommandLineParser parser = new PosixParser();
117     CommandLine cmd = parser.parse(options, args);
118 
119     verbose = cmd.hasOption("v");
120     printValue = cmd.hasOption("p");
121     printKey = cmd.hasOption("e") || printValue;
122     shouldPrintMeta = cmd.hasOption("m");
123     printBlocks = cmd.hasOption("b");
124     printStats = cmd.hasOption("s");
125     checkRow = cmd.hasOption("k");
126     checkFamily = cmd.hasOption("a");
127 
128     if (cmd.hasOption("f")) {
129       files.add(new Path(cmd.getOptionValue("f")));
130     }
131 
132     if (cmd.hasOption("w")) {
133       String key = cmd.getOptionValue("w");
134       if (key != null && key.length() != 0) {
135         row = key.getBytes();
136         isSeekToRow = true;
137       } else {
138         System.err.println("Invalid row is specified.");
139         System.exit(-1);
140       }
141     }
142 
143     if (cmd.hasOption("r")) {
144       String regionName = cmd.getOptionValue("r");
145       byte[] rn = Bytes.toBytes(regionName);
146       byte[][] hri = HRegionInfo.parseRegionName(rn);
147       Path rootDir = FSUtils.getRootDir(conf);
148       Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
149       String enc = HRegionInfo.encodeRegionName(rn);
150       Path regionDir = new Path(tableDir, enc);
151       if (verbose)
152         System.out.println("region dir -> " + regionDir);
153       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
154           regionDir);
155       if (verbose)
156         System.out.println("Number of region files found -> "
157             + regionFiles.size());
158       if (verbose) {
159         int i = 1;
160         for (Path p : regionFiles) {
161           if (verbose)
162             System.out.println("Found file[" + i++ + "] -> " + p);
163         }
164       }
165       files.addAll(regionFiles);
166     }
167 
168     return true;
169   }
170 
171   /**
172    * Runs the command-line pretty-printer, and returns the desired command
173    * exit code (zero for success, non-zero for failure).
174    */
175   public int run(String[] args) {
176     conf = HBaseConfiguration.create();
177     conf.set("fs.defaultFS",
178         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
179     conf.set("fs.default.name",
180         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
181     SchemaMetrics.configureGlobally(conf);
182     try {
183       if (!parseOptions(args))
184         return 1;
185     } catch (IOException ex) {
186       LOG.error("Error parsing command-line options", ex);
187       return 1;
188     } catch (ParseException ex) {
189       LOG.error("Error parsing command-line options", ex);
190       return 1;
191     }
192 
193     // iterate over all files found
194     for (Path fileName : files) {
195       try {
196         processFile(fileName);
197       } catch (IOException ex) {
198         LOG.error("Error reading " + fileName, ex);
199       }
200     }
201 
202     if (verbose || printKey) {
203       System.out.println("Scanned kv count -> " + count);
204     }
205 
206     return 0;
207   }
208 
209   private void processFile(Path file) throws IOException {
210     if (verbose)
211       System.out.println("Scanning -> " + file);
212     FileSystem fs = file.getFileSystem(conf);
213     if (!fs.exists(file)) {
214       System.err.println("ERROR, file doesnt exist: " + file);
215     }
216 
217     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
218 
219     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
220 
221     KeyValueStatsCollector fileStats = null;
222 
223     if (verbose || printKey || checkRow || checkFamily || printStats) {
224       // scan over file and read key/value's and check if requested
225       HFileScanner scanner = reader.getScanner(false, false, false);
226       fileStats = new KeyValueStatsCollector();
227       boolean shouldScanKeysValues = false;
228       if (this.isSeekToRow) {
229         // seek to the first kv on this row
230         shouldScanKeysValues = 
231           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
232       } else {
233         shouldScanKeysValues = scanner.seekTo();
234       }
235       if (shouldScanKeysValues)
236         scanKeysValues(file, fileStats, scanner, row);
237     }
238 
239     // print meta data
240     if (shouldPrintMeta) {
241       printMeta(reader, fileInfo);
242     }
243 
244     if (printBlocks) {
245       System.out.println("Block Index:");
246       System.out.println(reader.getDataBlockIndexReader());
247     }
248 
249     if (printStats) {
250       fileStats.finish();
251       System.out.println("Stats:\n" + fileStats);
252     }
253 
254     reader.close();
255   }
256 
257   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
258       HFileScanner scanner,  byte[] row) throws IOException {
259     KeyValue pkv = null;
260     do {
261       KeyValue kv = scanner.getKeyValue();
262       if (row != null && row.length != 0) {
263         int result = Bytes.compareTo(kv.getRow(), row);
264         if (result > 0) {
265           break;
266         } else if (result < 0) {
267           continue;
268         }
269       }
270       // collect stats
271       if (printStats) {
272         fileStats.collect(kv);
273       }
274       // dump key value
275       if (printKey) {
276         System.out.print("K: " + kv);
277         if (printValue) {
278           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
279         }
280         System.out.println();
281       }
282       // check if rows are in order
283       if (checkRow && pkv != null) {
284         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
285           System.err.println("WARNING, previous row is greater then"
286               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
287               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
288               + Bytes.toStringBinary(kv.getKey()));
289         }
290       }
291       // check if families are consistent
292       if (checkFamily) {
293         String fam = Bytes.toString(kv.getFamily());
294         if (!file.toString().contains(fam)) {
295           System.err.println("WARNING, filename does not match kv family,"
296               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
297               + Bytes.toStringBinary(kv.getKey()));
298         }
299         if (pkv != null
300             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
301           System.err.println("WARNING, previous kv has different family"
302               + " compared to current key\n\tfilename -> " + file
303               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
304               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
305         }
306       }
307       pkv = kv;
308       ++count;
309     } while (scanner.next());
310   }
311 
312   /**
313    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
314    * with a four-space indentation.
315    */
316   private static String asSeparateLines(String keyValueStr) {
317     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
318                                   ",\n" + FOUR_SPACES + "$1");
319   }
320 
321   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
322       throws IOException {
323     System.out.println("Block index size as per heapsize: "
324         + reader.indexSize());
325     System.out.println(asSeparateLines(reader.toString()));
326     System.out.println("Trailer:\n    "
327         + asSeparateLines(reader.getTrailer().toString()));
328     System.out.println("Fileinfo:");
329     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
330       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
331       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
332         long seqid = Bytes.toLong(e.getValue());
333         System.out.println(seqid);
334       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
335         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
336         Writables.copyWritable(e.getValue(), timeRangeTracker);
337         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
338             + timeRangeTracker.getMaximumTimestamp());
339       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
340           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
341         System.out.println(Bytes.toInt(e.getValue()));
342       } else {
343         System.out.println(Bytes.toStringBinary(e.getValue()));
344       }
345     }
346 
347     try {
348       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
349     } catch (Exception e) {
350       System.out.println("Unable to retrieve the midkey");
351     }
352 
353     // Printing general bloom information
354     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
355     BloomFilter bloomFilter = null;
356     if (bloomMeta != null)
357       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
358 
359     System.out.println("Bloom filter:");
360     if (bloomFilter != null) {
361       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
362           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
363     } else {
364       System.out.println(FOUR_SPACES + "Not present");
365     }
366 
367     // Printing delete bloom information
368     bloomMeta = reader.getDeleteBloomFilterMetadata();
369     bloomFilter = null;
370     if (bloomMeta != null)
371       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
372 
373     System.out.println("Delete Family Bloom filter:");
374     if (bloomFilter != null) {
375       System.out.println(FOUR_SPACES
376           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
377               "\n" + FOUR_SPACES));
378     } else {
379       System.out.println(FOUR_SPACES + "Not present");
380     }
381   }
382 
383   private static class KeyValueStatsCollector {
384     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
385     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
386     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
387     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
388     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
389     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
390     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
391 
392     long curRowBytes = 0;
393     long curRowCols = 0;
394 
395     byte[] biggestRow = null;
396 
397     private KeyValue prevKV = null;
398     private long maxRowBytes = 0;
399     private long curRowKeyLength;
400 
401     public void collect(KeyValue kv) {
402       valLen.update(kv.getValueLength());
403       if (prevKV != null &&
404           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
405         // new row
406         collectRow();
407       }
408       curRowBytes += kv.getLength();
409       curRowKeyLength = kv.getKeyLength();
410       curRowCols++;
411       prevKV = kv;
412     }
413 
414     private void collectRow() {
415       rowSizeBytes.update(curRowBytes);
416       rowSizeCols.update(curRowCols);
417       keyLen.update(curRowKeyLength);
418 
419       if (curRowBytes > maxRowBytes && prevKV != null) {
420         biggestRow = prevKV.getRow();
421         maxRowBytes = curRowBytes;
422       }
423 
424       curRowBytes = 0;
425       curRowCols = 0;
426     }
427 
428     public void finish() {
429       if (curRowCols > 0) {
430         collectRow();
431       }
432     }
433 
434     @Override
435     public String toString() {
436       if (prevKV == null)
437         return "no data available for statistics";
438 
439       // Dump the metrics to the output stream
440       simpleReporter.shutdown();
441       simpleReporter.run();
442       metricsRegistry.shutdown();
443 
444       return
445               metricsOutput.toString() +
446                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
447     }
448   }
449 
450   private static class SimpleReporter extends ConsoleReporter {
451     private final PrintStream out;
452 
453     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
454       super(metricsRegistry, out, MetricPredicate.ALL);
455       this.out = out;
456     }
457 
458     @Override
459     public void run() {
460       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
461               MetricPredicate.ALL).entrySet()) {
462         try {
463           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
464             out.print("   " + subEntry.getKey().getName());
465             out.println(':');
466 
467             subEntry.getValue().processWith(this, subEntry.getKey(), out);
468           }
469         } catch (Exception e) {
470           e.printStackTrace(out);
471         }
472       }
473     }
474 
475     @Override
476     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
477       super.processHistogram(name, histogram, stream);
478       stream.printf(Locale.getDefault(), "             count = %d\n", histogram.count());
479     }
480   }
481 }