001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.io.hfile;
020
021import static com.codahale.metrics.MetricRegistry.name;
022
023import com.codahale.metrics.ConsoleReporter;
024import com.codahale.metrics.Counter;
025import com.codahale.metrics.Gauge;
026import com.codahale.metrics.Histogram;
027import com.codahale.metrics.Meter;
028import com.codahale.metrics.MetricFilter;
029import com.codahale.metrics.MetricRegistry;
030import com.codahale.metrics.ScheduledReporter;
031import com.codahale.metrics.Snapshot;
032import com.codahale.metrics.Timer;
033import java.io.ByteArrayOutputStream;
034import java.io.DataInput;
035import java.io.IOException;
036import java.io.PrintStream;
037import java.text.DateFormat;
038import java.util.ArrayList;
039import java.util.HashMap;
040import java.util.Iterator;
041import java.util.LinkedHashSet;
042import java.util.List;
043import java.util.Locale;
044import java.util.Map;
045import java.util.Set;
046import java.util.SortedMap;
047import java.util.TimeZone;
048import java.util.concurrent.TimeUnit;
049import org.apache.commons.lang3.StringUtils;
050import org.apache.hadoop.conf.Configuration;
051import org.apache.hadoop.conf.Configured;
052import org.apache.hadoop.fs.FileSystem;
053import org.apache.hadoop.fs.Path;
054import org.apache.hadoop.hbase.Cell;
055import org.apache.hadoop.hbase.CellComparator;
056import org.apache.hadoop.hbase.CellUtil;
057import org.apache.hadoop.hbase.HBaseConfiguration;
058import org.apache.hadoop.hbase.HBaseInterfaceAudience;
059import org.apache.hadoop.hbase.HConstants;
060import org.apache.hadoop.hbase.HRegionInfo;
061import org.apache.hadoop.hbase.KeyValue;
062import org.apache.hadoop.hbase.KeyValueUtil;
063import org.apache.hadoop.hbase.PrivateCellUtil;
064import org.apache.hadoop.hbase.TableName;
065import org.apache.hadoop.hbase.Tag;
066import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
067import org.apache.hadoop.hbase.mob.MobUtils;
068import org.apache.hadoop.hbase.regionserver.HStoreFile;
069import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
070import org.apache.hadoop.hbase.util.BloomFilter;
071import org.apache.hadoop.hbase.util.BloomFilterFactory;
072import org.apache.hadoop.hbase.util.BloomFilterUtil;
073import org.apache.hadoop.hbase.util.Bytes;
074import org.apache.hadoop.hbase.util.CommonFSUtils;
075import org.apache.hadoop.hbase.util.HFileArchiveUtil;
076import org.apache.hadoop.util.Tool;
077import org.apache.hadoop.util.ToolRunner;
078import org.apache.yetus.audience.InterfaceAudience;
079import org.apache.yetus.audience.InterfaceStability;
080import org.slf4j.Logger;
081import org.slf4j.LoggerFactory;
082
083import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
084import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
085import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
086import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
087import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionGroup;
088import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
089import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
090import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser;
091
092/**
093 * Implements pretty-printing functionality for {@link HFile}s.
094 */
095@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
096@InterfaceStability.Evolving
097public class HFilePrettyPrinter extends Configured implements Tool {
098
099  private static final Logger LOG = LoggerFactory.getLogger(HFilePrettyPrinter.class);
100
101  private Options options = new Options();
102
103  private boolean verbose;
104  private boolean printValue;
105  private boolean printKey;
106  private boolean shouldPrintMeta;
107  private boolean printBlockIndex;
108  private boolean printBlockHeaders;
109  private boolean printStats;
110  private boolean checkRow;
111  private boolean checkFamily;
112  private boolean isSeekToRow = false;
113  private boolean checkMobIntegrity = false;
114  private Map<String, List<Path>> mobFileLocations;
115  private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50;
116  private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20;
117  private PrintStream out = System.out;
118  private PrintStream err = System.err;
119
120  /**
121   * The row which the user wants to specify and print all the KeyValues for.
122   */
123  private byte[] row = null;
124
125  private List<Path> files = new ArrayList<>();
126  private int count;
127
128  private static final String FOUR_SPACES = "    ";
129
130  public HFilePrettyPrinter() {
131    super();
132    init();
133  }
134
135  public HFilePrettyPrinter(Configuration conf) {
136    super(conf);
137    init();
138  }
139
140  private void init() {
141    options.addOption("v", "verbose", false,
142        "Verbose output; emits file and meta data delimiters");
143    options.addOption("p", "printkv", false, "Print key/value pairs");
144    options.addOption("e", "printkey", false, "Print keys");
145    options.addOption("m", "printmeta", false, "Print meta data of file");
146    options.addOption("b", "printblocks", false, "Print block index meta data");
147    options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
148    options.addOption("k", "checkrow", false,
149        "Enable row order check; looks for out-of-order keys");
150    options.addOption("a", "checkfamily", false, "Enable family check");
151    options.addOption("w", "seekToRow", true,
152      "Seek to this row and print all the kvs for this row only");
153    options.addOption("s", "stats", false, "Print statistics");
154    options.addOption("i", "checkMobIntegrity", false,
155      "Print all cells whose mob files are missing");
156
157    OptionGroup files = new OptionGroup();
158    files.addOption(new Option("f", "file", true,
159      "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
160    files.addOption(new Option("r", "region", true,
161      "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
162    options.addOptionGroup(files);
163  }
164
165  public void setPrintStreams(PrintStream out, PrintStream err) {
166    this.out = out;
167    this.err = err;
168  }
169
170  public boolean parseOptions(String args[]) throws ParseException,
171      IOException {
172    if (args.length == 0) {
173      HelpFormatter formatter = new HelpFormatter();
174      formatter.printHelp("hfile", options, true);
175      return false;
176    }
177    CommandLineParser parser = new PosixParser();
178    CommandLine cmd = parser.parse(options, args);
179
180    verbose = cmd.hasOption("v");
181    printValue = cmd.hasOption("p");
182    printKey = cmd.hasOption("e") || printValue;
183    shouldPrintMeta = cmd.hasOption("m");
184    printBlockIndex = cmd.hasOption("b");
185    printBlockHeaders = cmd.hasOption("h");
186    printStats = cmd.hasOption("s");
187    checkRow = cmd.hasOption("k");
188    checkFamily = cmd.hasOption("a");
189    checkMobIntegrity = cmd.hasOption("i");
190
191    if (cmd.hasOption("f")) {
192      files.add(new Path(cmd.getOptionValue("f")));
193    }
194
195    if (cmd.hasOption("w")) {
196      String key = cmd.getOptionValue("w");
197      if (key != null && key.length() != 0) {
198        row = Bytes.toBytesBinary(key);
199        isSeekToRow = true;
200      } else {
201        err.println("Invalid row is specified.");
202        System.exit(-1);
203      }
204    }
205
206    if (cmd.hasOption("r")) {
207      String regionName = cmd.getOptionValue("r");
208      byte[] rn = Bytes.toBytes(regionName);
209      byte[][] hri = HRegionInfo.parseRegionName(rn);
210      Path rootDir = CommonFSUtils.getRootDir(getConf());
211      Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
212      String enc = HRegionInfo.encodeRegionName(rn);
213      Path regionDir = new Path(tableDir, enc);
214      if (verbose)
215        out.println("region dir -> " + regionDir);
216      List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
217          regionDir);
218      if (verbose)
219        out.println("Number of region files found -> "
220            + regionFiles.size());
221      if (verbose) {
222        int i = 1;
223        for (Path p : regionFiles) {
224          if (verbose)
225            out.println("Found file[" + i++ + "] -> " + p);
226        }
227      }
228      files.addAll(regionFiles);
229    }
230
231    if(checkMobIntegrity) {
232      if (verbose) {
233        System.out.println("checkMobIntegrity is enabled");
234      }
235      mobFileLocations = new HashMap<>();
236    }
237
238    cmd.getArgList().forEach((file) -> files.add(new Path(file)));
239
240    return true;
241  }
242
243  /**
244   * Runs the command-line pretty-printer, and returns the desired command
245   * exit code (zero for success, non-zero for failure).
246   */
247  @Override
248  public int run(String[] args) {
249    if (getConf() == null) {
250      throw new RuntimeException("A Configuration instance must be provided.");
251    }
252    try {
253      CommonFSUtils.setFsDefault(getConf(), CommonFSUtils.getRootDir(getConf()));
254      if (!parseOptions(args)) {
255        return 1;
256      }
257    } catch (IOException ex) {
258      LOG.error("Error parsing command-line options", ex);
259      return 1;
260    } catch (ParseException ex) {
261      LOG.error("Error parsing command-line options", ex);
262      return 1;
263    }
264
265    // iterate over all files found
266    for (Path fileName : files) {
267      try {
268        int exitCode = processFile(fileName, false);
269        if (exitCode != 0) {
270          return exitCode;
271        }
272      } catch (IOException ex) {
273        LOG.error("Error reading " + fileName, ex);
274        return -2;
275      }
276    }
277
278    if (verbose || printKey) {
279      out.println("Scanned kv count -> " + count);
280    }
281
282    return 0;
283  }
284
285  // HBASE-22561 introduces boolean checkRootDir for WebUI specificly
286  public int processFile(Path file, boolean checkRootDir) throws IOException {
287    if (verbose) {
288      out.println("Scanning -> " + file);
289    }
290
291    if (checkRootDir) {
292      Path rootPath = CommonFSUtils.getRootDir(getConf());
293      String rootString = rootPath + rootPath.SEPARATOR;
294      if (!file.toString().startsWith(rootString)) {
295        // First we see if fully-qualified URI matches the root dir. It might
296        // also be an absolute path in the same filesystem, so we prepend the FS
297        // of the root dir and see if that fully-qualified URI matches.
298        FileSystem rootFS = rootPath.getFileSystem(getConf());
299        String qualifiedFile = rootFS.getUri().toString() + file.toString();
300        if (!qualifiedFile.startsWith(rootString)) {
301          err.println(
302            "ERROR, file (" + file + ") is not in HBase's root directory (" + rootString + ")");
303          return -2;
304        }
305      }
306    }
307
308    FileSystem fs = file.getFileSystem(getConf());
309    if (!fs.exists(file)) {
310      err.println("ERROR, file doesnt exist: " + file);
311      return -2;
312    }
313
314    HFile.Reader reader = HFile.createReader(fs, file, CacheConfig.DISABLED, true, getConf());
315
316    Map<byte[], byte[]> fileInfo = reader.getHFileInfo();
317
318    KeyValueStatsCollector fileStats = null;
319
320    if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) {
321      // scan over file and read key/value's and check if requested
322      HFileScanner scanner = reader.getScanner(false, false, false);
323      fileStats = new KeyValueStatsCollector();
324      boolean shouldScanKeysValues = false;
325      if (this.isSeekToRow) {
326        // seek to the first kv on this row
327        shouldScanKeysValues =
328          (scanner.seekTo(PrivateCellUtil.createFirstOnRow(this.row)) != -1);
329      } else {
330        shouldScanKeysValues = scanner.seekTo();
331      }
332      if (shouldScanKeysValues)
333        scanKeysValues(file, fileStats, scanner, row);
334    }
335
336    // print meta data
337    if (shouldPrintMeta) {
338      printMeta(reader, fileInfo);
339    }
340
341    if (printBlockIndex) {
342      out.println("Block Index:");
343      out.println(reader.getDataBlockIndexReader());
344    }
345
346    if (printBlockHeaders) {
347      out.println("Block Headers:");
348      /*
349       * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
350       * TestLazyDataBlockDecompression. Refactor?
351       */
352      FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
353      long fileSize = fs.getFileStatus(file).getLen();
354      FixedFileTrailer trailer =
355        FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
356      long offset = trailer.getFirstDataBlockOffset(),
357        max = trailer.getLastDataBlockOffset();
358      HFileBlock block;
359      while (offset <= max) {
360        block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
361          /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
362        offset += block.getOnDiskSizeWithHeader();
363        out.println(block);
364      }
365    }
366
367    if (printStats) {
368      fileStats.finish();
369      out.println("Stats:\n" + fileStats);
370    }
371
372    reader.close();
373    return 0;
374  }
375
376  private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
377      HFileScanner scanner,  byte[] row) throws IOException {
378    Cell pCell = null;
379    FileSystem fs = FileSystem.get(getConf());
380    Set<String> foundMobFiles = new LinkedHashSet<>(FOUND_MOB_FILES_CACHE_CAPACITY);
381    Set<String> missingMobFiles = new LinkedHashSet<>(MISSING_MOB_FILES_CACHE_CAPACITY);
382    do {
383      Cell cell = scanner.getCell();
384      if (row != null && row.length != 0) {
385        int result = CellComparator.getInstance().compareRows(cell, row, 0, row.length);
386        if (result > 0) {
387          break;
388        } else if (result < 0) {
389          continue;
390        }
391      }
392      // collect stats
393      if (printStats) {
394        fileStats.collect(cell);
395      }
396      // dump key value
397      if (printKey) {
398        out.print("K: " + cell);
399        if (printValue) {
400          out.print(" V: "
401              + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
402                  cell.getValueLength()));
403          int i = 0;
404          List<Tag> tags = PrivateCellUtil.getTags(cell);
405          for (Tag tag : tags) {
406            out.print(String.format(" T[%d]: %s", i++, tag.toString()));
407          }
408        }
409        out.println();
410      }
411      // check if rows are in order
412      if (checkRow && pCell != null) {
413        if (CellComparator.getInstance().compareRows(pCell, cell) > 0) {
414          err.println("WARNING, previous row is greater then"
415              + " current row\n\tfilename -> " + file + "\n\tprevious -> "
416              + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent  -> "
417              + CellUtil.getCellKeyAsString(cell));
418        }
419      }
420      // check if families are consistent
421      if (checkFamily) {
422        String fam = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(),
423            cell.getFamilyLength());
424        if (!file.toString().contains(fam)) {
425          err.println("WARNING, filename does not match kv family,"
426              + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
427              + CellUtil.getCellKeyAsString(cell));
428        }
429        if (pCell != null && CellComparator.getInstance().compareFamilies(pCell, cell) != 0) {
430          err.println("WARNING, previous kv has different family"
431              + " compared to current key\n\tfilename -> " + file
432              + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell)
433              + "\n\tcurrent  -> " + CellUtil.getCellKeyAsString(cell));
434        }
435      }
436      // check if mob files are missing.
437      if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) {
438        Tag tnTag = MobUtils.getTableNameTag(cell);
439        if (tnTag == null) {
440          System.err.println("ERROR, wrong tag format in mob reference cell "
441            + CellUtil.getCellKeyAsString(cell));
442        } else if (!MobUtils.hasValidMobRefCellValue(cell)) {
443          System.err.println("ERROR, wrong value format in mob reference cell "
444            + CellUtil.getCellKeyAsString(cell));
445        } else {
446          TableName tn = TableName.valueOf(Tag.cloneValue(tnTag));
447          String mobFileName = MobUtils.getMobFileName(cell);
448          boolean exist = mobFileExists(fs, tn, mobFileName,
449            Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles);
450          if (!exist) {
451            // report error
452            System.err.println("ERROR, the mob file [" + mobFileName
453              + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell));
454          }
455        }
456      }
457      pCell = cell;
458      ++count;
459    } while (scanner.next());
460  }
461
462  /**
463   * Checks whether the referenced mob file exists.
464   */
465  private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family,
466    Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException {
467    if (foundMobFiles.contains(mobFileName)) {
468      return true;
469    }
470    if (missingMobFiles.contains(mobFileName)) {
471      return false;
472    }
473    String tableName = tn.getNameAsString();
474    List<Path> locations = mobFileLocations.get(tableName);
475    if (locations == null) {
476      locations = new ArrayList<>(2);
477      locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family));
478      locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn,
479        MobUtils.getMobRegionInfo(tn).getEncodedName(), family));
480      mobFileLocations.put(tn.getNameAsString(), locations);
481    }
482    boolean exist = false;
483    for (Path location : locations) {
484      Path mobFilePath = new Path(location, mobFileName);
485      if (fs.exists(mobFilePath)) {
486        exist = true;
487        break;
488      }
489    }
490    if (exist) {
491      evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY);
492      foundMobFiles.add(mobFileName);
493    } else {
494      evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY);
495      missingMobFiles.add(mobFileName);
496    }
497    return exist;
498  }
499
500  /**
501   * Evicts the cached mob files if the set is larger than the limit.
502   */
503  private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) {
504    if (mobFileNames.size() < limit) {
505      return;
506    }
507    int index = 0;
508    int evict = limit / 2;
509    Iterator<String> fileNamesItr = mobFileNames.iterator();
510    while (index < evict && fileNamesItr.hasNext()) {
511      fileNamesItr.next();
512      fileNamesItr.remove();
513      index++;
514    }
515  }
516
517  /**
518   * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
519   * with a four-space indentation.
520   */
521  private static String asSeparateLines(String keyValueStr) {
522    return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
523                                  ",\n" + FOUR_SPACES + "$1");
524  }
525
526  private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
527      throws IOException {
528    out.println("Block index size as per heapsize: "
529        + reader.indexSize());
530    out.println(asSeparateLines(reader.toString()));
531    out.println("Trailer:\n    "
532        + asSeparateLines(reader.getTrailer().toString()));
533    out.println("Fileinfo:");
534    for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
535      out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
536      if (Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY)
537          || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT)
538          || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS)
539          || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY)
540          || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS)
541          || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY)) {
542        out.println(Bytes.toLong(e.getValue()));
543      } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) {
544        TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue());
545        out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
546      } else if (Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN)
547          || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN)
548          || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION)
549          || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN)) {
550        out.println(Bytes.toInt(e.getValue()));
551      } else if (Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY)
552          || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED)
553          || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY)) {
554        out.println(Bytes.toBoolean(e.getValue()));
555      } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) {
556        out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString());
557      } else {
558        out.println(Bytes.toStringBinary(e.getValue()));
559      }
560    }
561
562    try {
563      out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString));
564    } catch (Exception e) {
565      out.println ("Unable to retrieve the midkey");
566    }
567
568    // Printing general bloom information
569    DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
570    BloomFilter bloomFilter = null;
571    if (bloomMeta != null)
572      bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
573
574    out.println("Bloom filter:");
575    if (bloomFilter != null) {
576      out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
577          BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
578    } else {
579      out.println(FOUR_SPACES + "Not present");
580    }
581
582    // Printing delete bloom information
583    bloomMeta = reader.getDeleteBloomFilterMetadata();
584    bloomFilter = null;
585    if (bloomMeta != null)
586      bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
587
588    out.println("Delete Family Bloom filter:");
589    if (bloomFilter != null) {
590      out.println(FOUR_SPACES
591          + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP,
592              "\n" + FOUR_SPACES));
593    } else {
594      out.println(FOUR_SPACES + "Not present");
595    }
596  }
597
598  private static class KeyValueStatsCollector {
599    private final MetricRegistry metricsRegistry = new MetricRegistry();
600    private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
601    private final SimpleReporter simpleReporter = SimpleReporter.forRegistry(metricsRegistry).
602        outputTo(new PrintStream(metricsOutput)).filter(MetricFilter.ALL).build();
603
604    Histogram keyLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Key length"));
605    Histogram valLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Val length"));
606    Histogram rowSizeBytes = metricsRegistry.histogram(
607      name(HFilePrettyPrinter.class, "Row size (bytes)"));
608    Histogram rowSizeCols = metricsRegistry.histogram(
609      name(HFilePrettyPrinter.class, "Row size (columns)"));
610
611    long curRowBytes = 0;
612    long curRowCols = 0;
613
614    byte[] biggestRow = null;
615
616    private Cell prevCell = null;
617    private long maxRowBytes = 0;
618    private long curRowKeyLength;
619
620    public void collect(Cell cell) {
621      valLen.update(cell.getValueLength());
622      if (prevCell != null &&
623          CellComparator.getInstance().compareRows(prevCell, cell) != 0) {
624        // new row
625        collectRow();
626      }
627      curRowBytes += cell.getSerializedSize();
628      curRowKeyLength = KeyValueUtil.keyLength(cell);
629      curRowCols++;
630      prevCell = cell;
631    }
632
633    private void collectRow() {
634      rowSizeBytes.update(curRowBytes);
635      rowSizeCols.update(curRowCols);
636      keyLen.update(curRowKeyLength);
637
638      if (curRowBytes > maxRowBytes && prevCell != null) {
639        biggestRow = CellUtil.cloneRow(prevCell);
640        maxRowBytes = curRowBytes;
641      }
642
643      curRowBytes = 0;
644      curRowCols = 0;
645    }
646
647    public void finish() {
648      if (curRowCols > 0) {
649        collectRow();
650      }
651    }
652
653    @Override
654    public String toString() {
655      if (prevCell == null)
656        return "no data available for statistics";
657
658      // Dump the metrics to the output stream
659      simpleReporter.stop();
660      simpleReporter.report();
661
662      return
663              metricsOutput.toString() +
664                      "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
665    }
666  }
667
668  /**
669   * Almost identical to ConsoleReporter, but extending ScheduledReporter,
670   * as extending ConsoleReporter in this version of dropwizard is now too much trouble.
671   */
672  private static class SimpleReporter extends ScheduledReporter {
673    /**
674     * Returns a new {@link Builder} for {@link ConsoleReporter}.
675     *
676     * @param registry the registry to report
677     * @return a {@link Builder} instance for a {@link ConsoleReporter}
678     */
679    public static Builder forRegistry(MetricRegistry registry) {
680      return new Builder(registry);
681    }
682
683    /**
684     * A builder for {@link SimpleReporter} instances. Defaults to using the default locale and
685     * time zone, writing to {@code System.out}, converting rates to events/second, converting
686     * durations to milliseconds, and not filtering metrics.
687     */
688    public static class Builder {
689      private final MetricRegistry registry;
690      private PrintStream output;
691      private Locale locale;
692      private TimeZone timeZone;
693      private TimeUnit rateUnit;
694      private TimeUnit durationUnit;
695      private MetricFilter filter;
696
697      private Builder(MetricRegistry registry) {
698        this.registry = registry;
699        this.output = System.out;
700        this.locale = Locale.getDefault();
701        this.timeZone = TimeZone.getDefault();
702        this.rateUnit = TimeUnit.SECONDS;
703        this.durationUnit = TimeUnit.MILLISECONDS;
704        this.filter = MetricFilter.ALL;
705      }
706
707      /**
708       * Write to the given {@link PrintStream}.
709       *
710       * @param output a {@link PrintStream} instance.
711       * @return {@code this}
712       */
713      public Builder outputTo(PrintStream output) {
714        this.output = output;
715        return this;
716      }
717
718      /**
719       * Only report metrics which match the given filter.
720       *
721       * @param filter a {@link MetricFilter}
722       * @return {@code this}
723       */
724      public Builder filter(MetricFilter filter) {
725        this.filter = filter;
726        return this;
727      }
728
729      /**
730       * Builds a {@link ConsoleReporter} with the given properties.
731       *
732       * @return a {@link ConsoleReporter}
733       */
734      public SimpleReporter build() {
735        return new SimpleReporter(registry,
736            output,
737            locale,
738            timeZone,
739            rateUnit,
740            durationUnit,
741            filter);
742      }
743    }
744
745    private final PrintStream output;
746    private final Locale locale;
747    private final DateFormat dateFormat;
748
749    private SimpleReporter(MetricRegistry registry,
750                            PrintStream output,
751                            Locale locale,
752                            TimeZone timeZone,
753                            TimeUnit rateUnit,
754                            TimeUnit durationUnit,
755                            MetricFilter filter) {
756      super(registry, "simple-reporter", filter, rateUnit, durationUnit);
757      this.output = output;
758      this.locale = locale;
759
760      this.dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT,
761          DateFormat.MEDIUM,
762          locale);
763      dateFormat.setTimeZone(timeZone);
764    }
765
766    @Override
767    public void report(SortedMap<String, Gauge> gauges,
768                       SortedMap<String, Counter> counters,
769                       SortedMap<String, Histogram> histograms,
770                       SortedMap<String, Meter> meters,
771                       SortedMap<String, Timer> timers) {
772      // we know we only have histograms
773      if (!histograms.isEmpty()) {
774        for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
775          output.print("   " + StringUtils.substringAfterLast(entry.getKey(), "."));
776          output.println(':');
777          printHistogram(entry.getValue());
778        }
779        output.println();
780      }
781
782      output.println();
783      output.flush();
784    }
785
786    private void printHistogram(Histogram histogram) {
787      Snapshot snapshot = histogram.getSnapshot();
788      output.printf(locale, "               min = %d%n", snapshot.getMin());
789      output.printf(locale, "               max = %d%n", snapshot.getMax());
790      output.printf(locale, "              mean = %2.2f%n", snapshot.getMean());
791      output.printf(locale, "            stddev = %2.2f%n", snapshot.getStdDev());
792      output.printf(locale, "            median = %2.2f%n", snapshot.getMedian());
793      output.printf(locale, "              75%% <= %2.2f%n", snapshot.get75thPercentile());
794      output.printf(locale, "              95%% <= %2.2f%n", snapshot.get95thPercentile());
795      output.printf(locale, "              98%% <= %2.2f%n", snapshot.get98thPercentile());
796      output.printf(locale, "              99%% <= %2.2f%n", snapshot.get99thPercentile());
797      output.printf(locale, "            99.9%% <= %2.2f%n", snapshot.get999thPercentile());
798      output.printf(locale, "             count = %d%n", histogram.getCount());
799    }
800  }
801
802  public static void main(String[] args) throws Exception {
803    Configuration conf = HBaseConfiguration.create();
804    // no need for a block cache
805    conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
806    int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
807    System.exit(ret);
808  }
809}