001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static com.codahale.metrics.MetricRegistry.name;
021
022import com.codahale.metrics.ConsoleReporter;
023import com.codahale.metrics.Counter;
024import com.codahale.metrics.Gauge;
025import com.codahale.metrics.Histogram;
026import com.codahale.metrics.Meter;
027import com.codahale.metrics.MetricFilter;
028import com.codahale.metrics.MetricRegistry;
029import com.codahale.metrics.ScheduledReporter;
030import com.codahale.metrics.Snapshot;
031import com.codahale.metrics.Timer;
032import java.io.ByteArrayOutputStream;
033import java.io.DataInput;
034import java.io.IOException;
035import java.io.PrintStream;
036import java.text.DateFormat;
037import java.util.ArrayList;
038import java.util.HashMap;
039import java.util.Iterator;
040import java.util.LinkedHashSet;
041import java.util.List;
042import java.util.Locale;
043import java.util.Map;
044import java.util.Optional;
045import java.util.Set;
046import java.util.SortedMap;
047import java.util.TimeZone;
048import java.util.concurrent.TimeUnit;
049import org.apache.commons.lang3.StringUtils;
050import org.apache.hadoop.conf.Configuration;
051import org.apache.hadoop.conf.Configured;
052import org.apache.hadoop.fs.FileSystem;
053import org.apache.hadoop.fs.Path;
054import org.apache.hadoop.hbase.Cell;
055import org.apache.hadoop.hbase.CellComparator;
056import org.apache.hadoop.hbase.CellUtil;
057import org.apache.hadoop.hbase.HBaseConfiguration;
058import org.apache.hadoop.hbase.HBaseInterfaceAudience;
059import org.apache.hadoop.hbase.HConstants;
060import org.apache.hadoop.hbase.HRegionInfo;
061import org.apache.hadoop.hbase.KeyValue;
062import org.apache.hadoop.hbase.KeyValueUtil;
063import org.apache.hadoop.hbase.PrivateCellUtil;
064import org.apache.hadoop.hbase.TableName;
065import org.apache.hadoop.hbase.Tag;
066import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
067import org.apache.hadoop.hbase.mob.MobUtils;
068import org.apache.hadoop.hbase.regionserver.HStoreFile;
069import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
070import org.apache.hadoop.hbase.util.BloomFilter;
071import org.apache.hadoop.hbase.util.BloomFilterFactory;
072import org.apache.hadoop.hbase.util.BloomFilterUtil;
073import org.apache.hadoop.hbase.util.Bytes;
074import org.apache.hadoop.hbase.util.CommonFSUtils;
075import org.apache.hadoop.hbase.util.HFileArchiveUtil;
076import org.apache.hadoop.util.Tool;
077import org.apache.hadoop.util.ToolRunner;
078import org.apache.yetus.audience.InterfaceAudience;
079import org.apache.yetus.audience.InterfaceStability;
080import org.slf4j.Logger;
081import org.slf4j.LoggerFactory;
082
083import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
084import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
085import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
086import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
087import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionGroup;
088import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
089import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
090import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser;
091
092/**
093 * Implements pretty-printing functionality for {@link HFile}s.
094 */
095@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
096@InterfaceStability.Evolving
097public class HFilePrettyPrinter extends Configured implements Tool {
098
099  private static final Logger LOG = LoggerFactory.getLogger(HFilePrettyPrinter.class);
100
101  private Options options = new Options();
102
103  private boolean verbose;
104  private boolean printValue;
105  private boolean printKey;
106  private boolean shouldPrintMeta;
107  private boolean printBlockIndex;
108  private boolean printBlockHeaders;
109  private boolean printStats;
110  private boolean checkRow;
111  private boolean checkFamily;
112  private boolean isSeekToRow = false;
113  private boolean checkMobIntegrity = false;
114  private Map<String, List<Path>> mobFileLocations;
115  private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50;
116  private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20;
117  private PrintStream out = System.out;
118  private PrintStream err = System.err;
119
120  /**
121   * The row which the user wants to specify and print all the KeyValues for.
122   */
123  private byte[] row = null;
124
125  private List<Path> files = new ArrayList<>();
126  private int count;
127
128  private static final String FOUR_SPACES = "    ";
129
130  public HFilePrettyPrinter() {
131    super();
132    init();
133  }
134
135  public HFilePrettyPrinter(Configuration conf) {
136    super(conf);
137    init();
138  }
139
140  private void init() {
141    options.addOption("v", "verbose", false, "Verbose output; emits file and meta data delimiters");
142    options.addOption("p", "printkv", false, "Print key/value pairs");
143    options.addOption("e", "printkey", false, "Print keys");
144    options.addOption("m", "printmeta", false, "Print meta data of file");
145    options.addOption("b", "printblocks", false, "Print block index meta data");
146    options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
147    options.addOption("k", "checkrow", false,
148      "Enable row order check; looks for out-of-order keys");
149    options.addOption("a", "checkfamily", false, "Enable family check");
150    options.addOption("w", "seekToRow", true,
151      "Seek to this row and print all the kvs for this row only");
152    options.addOption("s", "stats", false, "Print statistics");
153    options.addOption("i", "checkMobIntegrity", false,
154      "Print all cells whose mob files are missing");
155
156    OptionGroup files = new OptionGroup();
157    files.addOption(new Option("f", "file", true,
158      "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
159    files.addOption(
160      new Option("r", "region", true, "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
161    options.addOptionGroup(files);
162  }
163
164  public void setPrintStreams(PrintStream out, PrintStream err) {
165    this.out = out;
166    this.err = err;
167  }
168
169  public boolean parseOptions(String args[]) throws ParseException, IOException {
170    if (args.length == 0) {
171      HelpFormatter formatter = new HelpFormatter();
172      formatter.printHelp("hfile", options, true);
173      return false;
174    }
175    CommandLineParser parser = new PosixParser();
176    CommandLine cmd = parser.parse(options, args);
177
178    verbose = cmd.hasOption("v");
179    printValue = cmd.hasOption("p");
180    printKey = cmd.hasOption("e") || printValue;
181    shouldPrintMeta = cmd.hasOption("m");
182    printBlockIndex = cmd.hasOption("b");
183    printBlockHeaders = cmd.hasOption("h");
184    printStats = cmd.hasOption("s");
185    checkRow = cmd.hasOption("k");
186    checkFamily = cmd.hasOption("a");
187    checkMobIntegrity = cmd.hasOption("i");
188
189    if (cmd.hasOption("f")) {
190      files.add(new Path(cmd.getOptionValue("f")));
191    }
192
193    if (cmd.hasOption("w")) {
194      String key = cmd.getOptionValue("w");
195      if (key != null && key.length() != 0) {
196        row = Bytes.toBytesBinary(key);
197        isSeekToRow = true;
198      } else {
199        err.println("Invalid row is specified.");
200        System.exit(-1);
201      }
202    }
203
204    if (cmd.hasOption("r")) {
205      String regionName = cmd.getOptionValue("r");
206      byte[] rn = Bytes.toBytes(regionName);
207      byte[][] hri = HRegionInfo.parseRegionName(rn);
208      Path rootDir = CommonFSUtils.getRootDir(getConf());
209      Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
210      String enc = HRegionInfo.encodeRegionName(rn);
211      Path regionDir = new Path(tableDir, enc);
212      if (verbose) out.println("region dir -> " + regionDir);
213      List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()), regionDir);
214      if (verbose) out.println("Number of region files found -> " + regionFiles.size());
215      if (verbose) {
216        int i = 1;
217        for (Path p : regionFiles) {
218          if (verbose) out.println("Found file[" + i++ + "] -> " + p);
219        }
220      }
221      files.addAll(regionFiles);
222    }
223
224    if (checkMobIntegrity) {
225      if (verbose) {
226        System.out.println("checkMobIntegrity is enabled");
227      }
228      mobFileLocations = new HashMap<>();
229    }
230
231    cmd.getArgList().forEach((file) -> files.add(new Path(file)));
232
233    return true;
234  }
235
236  /**
237   * Runs the command-line pretty-printer, and returns the desired command exit code (zero for
238   * success, non-zero for failure).
239   */
240  @Override
241  public int run(String[] args) {
242    if (getConf() == null) {
243      throw new RuntimeException("A Configuration instance must be provided.");
244    }
245    try {
246      CommonFSUtils.setFsDefault(getConf(), CommonFSUtils.getRootDir(getConf()));
247      if (!parseOptions(args)) {
248        return 1;
249      }
250    } catch (IOException ex) {
251      LOG.error("Error parsing command-line options", ex);
252      return 1;
253    } catch (ParseException ex) {
254      LOG.error("Error parsing command-line options", ex);
255      return 1;
256    }
257
258    // iterate over all files found
259    for (Path fileName : files) {
260      try {
261        int exitCode = processFile(fileName, false);
262        if (exitCode != 0) {
263          return exitCode;
264        }
265      } catch (IOException ex) {
266        LOG.error("Error reading " + fileName, ex);
267        return -2;
268      }
269    }
270
271    if (verbose || printKey) {
272      out.println("Scanned kv count -> " + count);
273    }
274
275    return 0;
276  }
277
278  // HBASE-22561 introduces boolean checkRootDir for WebUI specificly
279  public int processFile(Path file, boolean checkRootDir) throws IOException {
280    if (verbose) {
281      out.println("Scanning -> " + file);
282    }
283
284    if (checkRootDir) {
285      Path rootPath = CommonFSUtils.getRootDir(getConf());
286      String rootString = rootPath + Path.SEPARATOR;
287      if (!file.toString().startsWith(rootString)) {
288        // First we see if fully-qualified URI matches the root dir. It might
289        // also be an absolute path in the same filesystem, so we prepend the FS
290        // of the root dir and see if that fully-qualified URI matches.
291        FileSystem rootFS = rootPath.getFileSystem(getConf());
292        String qualifiedFile = rootFS.getUri().toString() + file.toString();
293        if (!qualifiedFile.startsWith(rootString)) {
294          err.println(
295            "ERROR, file (" + file + ") is not in HBase's root directory (" + rootString + ")");
296          return -2;
297        }
298      }
299    }
300
301    FileSystem fs = file.getFileSystem(getConf());
302    if (!fs.exists(file)) {
303      err.println("ERROR, file doesnt exist: " + file);
304      return -2;
305    }
306
307    HFile.Reader reader = HFile.createReader(fs, file, CacheConfig.DISABLED, true, getConf());
308
309    Map<byte[], byte[]> fileInfo = reader.getHFileInfo();
310
311    KeyValueStatsCollector fileStats = null;
312
313    if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) {
314      // scan over file and read key/value's and check if requested
315      HFileScanner scanner = reader.getScanner(getConf(), false, false, false);
316      fileStats = new KeyValueStatsCollector();
317      boolean shouldScanKeysValues;
318      if (this.isSeekToRow && !Bytes.equals(row, reader.getFirstRowKey().orElse(null))) {
319        // seek to the first kv on this row
320        shouldScanKeysValues = (scanner.seekTo(PrivateCellUtil.createFirstOnRow(this.row)) != -1);
321      } else {
322        shouldScanKeysValues = scanner.seekTo();
323      }
324      if (shouldScanKeysValues) {
325        scanKeysValues(file, fileStats, scanner, row);
326      }
327    }
328
329    // print meta data
330    if (shouldPrintMeta) {
331      printMeta(reader, fileInfo);
332    }
333
334    if (printBlockIndex) {
335      out.println("Block Index:");
336      out.println(reader.getDataBlockIndexReader());
337    }
338
339    if (printBlockHeaders) {
340      out.println("Block Headers:");
341      /*
342       * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
343       * TestLazyDataBlockDecompression. Refactor?
344       */
345      FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
346      long fileSize = fs.getFileStatus(file).getLen();
347      FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
348      long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
349      HFileBlock block;
350      while (offset <= max) {
351        block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
352          /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
353        offset += block.getOnDiskSizeWithHeader();
354        out.println(block);
355      }
356    }
357
358    if (printStats) {
359      fileStats.finish();
360      out.println("Stats:\n" + fileStats);
361    }
362
363    reader.close();
364    return 0;
365  }
366
367  private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner,
368    byte[] row) throws IOException {
369    Cell pCell = null;
370    FileSystem fs = FileSystem.get(getConf());
371    Set<String> foundMobFiles = new LinkedHashSet<>(FOUND_MOB_FILES_CACHE_CAPACITY);
372    Set<String> missingMobFiles = new LinkedHashSet<>(MISSING_MOB_FILES_CACHE_CAPACITY);
373    do {
374      Cell cell = scanner.getCell();
375      if (row != null && row.length != 0) {
376        int result = CellComparator.getInstance().compareRows(cell, row, 0, row.length);
377        if (result > 0) {
378          break;
379        } else if (result < 0) {
380          continue;
381        }
382      }
383      // collect stats
384      if (printStats) {
385        fileStats.collect(cell);
386      }
387      // dump key value
388      if (printKey) {
389        out.print("K: " + cell);
390        if (printValue) {
391          out.print(" V: " + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
392            cell.getValueLength()));
393          int i = 0;
394          List<Tag> tags = PrivateCellUtil.getTags(cell);
395          for (Tag tag : tags) {
396            out.print(String.format(" T[%d]: %s", i++, tag.toString()));
397          }
398        }
399        out.println();
400      }
401      // check if rows are in order
402      if (checkRow && pCell != null) {
403        if (CellComparator.getInstance().compareRows(pCell, cell) > 0) {
404          err.println("WARNING, previous row is greater then" + " current row\n\tfilename -> "
405            + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent  -> "
406            + CellUtil.getCellKeyAsString(cell));
407        }
408      }
409      // check if families are consistent
410      if (checkFamily) {
411        String fam =
412          Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
413        if (!file.toString().contains(fam)) {
414          err.println("WARNING, filename does not match kv family," + "\n\tfilename -> " + file
415            + "\n\tkeyvalue -> " + CellUtil.getCellKeyAsString(cell));
416        }
417        if (pCell != null && CellComparator.getInstance().compareFamilies(pCell, cell) != 0) {
418          err.println(
419            "WARNING, previous kv has different family" + " compared to current key\n\tfilename -> "
420              + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent  -> "
421              + CellUtil.getCellKeyAsString(cell));
422        }
423      }
424      // check if mob files are missing.
425      if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) {
426        Optional<TableName> tn = MobUtils.getTableName(cell);
427        if (!tn.isPresent()) {
428          System.err.println(
429            "ERROR, wrong tag format in mob reference cell " + CellUtil.getCellKeyAsString(cell));
430        } else if (!MobUtils.hasValidMobRefCellValue(cell)) {
431          System.err.println(
432            "ERROR, wrong value format in mob reference cell " + CellUtil.getCellKeyAsString(cell));
433        } else {
434          String mobFileName = MobUtils.getMobFileName(cell);
435          boolean exist = mobFileExists(fs, tn.get(), mobFileName,
436            Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles);
437          if (!exist) {
438            // report error
439            System.err.println("ERROR, the mob file [" + mobFileName
440              + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell));
441          }
442        }
443      }
444      pCell = cell;
445      ++count;
446    } while (scanner.next());
447  }
448
449  /**
450   * Checks whether the referenced mob file exists.
451   */
452  private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family,
453    Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException {
454    if (foundMobFiles.contains(mobFileName)) {
455      return true;
456    }
457    if (missingMobFiles.contains(mobFileName)) {
458      return false;
459    }
460    String tableName = tn.getNameAsString();
461    List<Path> locations = mobFileLocations.get(tableName);
462    if (locations == null) {
463      locations = new ArrayList<>(2);
464      locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family));
465      locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn,
466        MobUtils.getMobRegionInfo(tn).getEncodedName(), family));
467      mobFileLocations.put(tn.getNameAsString(), locations);
468    }
469    boolean exist = false;
470    for (Path location : locations) {
471      Path mobFilePath = new Path(location, mobFileName);
472      if (fs.exists(mobFilePath)) {
473        exist = true;
474        break;
475      }
476    }
477    if (exist) {
478      evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY);
479      foundMobFiles.add(mobFileName);
480    } else {
481      evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY);
482      missingMobFiles.add(mobFileName);
483    }
484    return exist;
485  }
486
487  /**
488   * Evicts the cached mob files if the set is larger than the limit.
489   */
490  private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) {
491    if (mobFileNames.size() < limit) {
492      return;
493    }
494    int index = 0;
495    int evict = limit / 2;
496    Iterator<String> fileNamesItr = mobFileNames.iterator();
497    while (index < evict && fileNamesItr.hasNext()) {
498      fileNamesItr.next();
499      fileNamesItr.remove();
500      index++;
501    }
502  }
503
504  /**
505   * Format a string of the form "k1=v1, k2=v2, ..." into separate lines with a four-space
506   * indentation.
507   */
508  private static String asSeparateLines(String keyValueStr) {
509    return keyValueStr.replaceAll(", ([a-zA-Z]+=)", ",\n" + FOUR_SPACES + "$1");
510  }
511
512  private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException {
513    out.println("Block index size as per heapsize: " + reader.indexSize());
514    out.println(asSeparateLines(reader.toString()));
515    out.println("Trailer:\n    " + asSeparateLines(reader.getTrailer().toString()));
516    out.println("Fileinfo:");
517    for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
518      out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
519      if (
520        Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY)
521          || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT)
522          || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS)
523          || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY)
524          || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS)
525          || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY)
526      ) {
527        out.println(Bytes.toLong(e.getValue()));
528      } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) {
529        TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue());
530        out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
531      } else if (
532        Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN)
533          || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN)
534          || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION)
535          || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN)
536      ) {
537        out.println(Bytes.toInt(e.getValue()));
538      } else if (
539        Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY)
540          || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED)
541          || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY)
542      ) {
543        out.println(Bytes.toBoolean(e.getValue()));
544      } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) {
545        out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString());
546      } else {
547        out.println(Bytes.toStringBinary(e.getValue()));
548      }
549    }
550
551    try {
552      out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString));
553    } catch (Exception e) {
554      out.println("Unable to retrieve the midkey");
555    }
556
557    // Printing general bloom information
558    DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
559    BloomFilter bloomFilter = null;
560    if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
561
562    out.println("Bloom filter:");
563    if (bloomFilter != null) {
564      out.println(FOUR_SPACES
565        + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
566    } else {
567      out.println(FOUR_SPACES + "Not present");
568    }
569
570    // Printing delete bloom information
571    bloomMeta = reader.getDeleteBloomFilterMetadata();
572    bloomFilter = null;
573    if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
574
575    out.println("Delete Family Bloom filter:");
576    if (bloomFilter != null) {
577      out.println(FOUR_SPACES
578        + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
579    } else {
580      out.println(FOUR_SPACES + "Not present");
581    }
582  }
583
584  private static class KeyValueStatsCollector {
585    private final MetricRegistry metricsRegistry = new MetricRegistry();
586    private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
587    private final SimpleReporter simpleReporter = SimpleReporter.forRegistry(metricsRegistry)
588      .outputTo(new PrintStream(metricsOutput)).filter(MetricFilter.ALL).build();
589
590    Histogram keyLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Key length"));
591    Histogram valLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Val length"));
592    Histogram rowSizeBytes =
593      metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Row size (bytes)"));
594    Histogram rowSizeCols =
595      metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Row size (columns)"));
596
597    long curRowBytes = 0;
598    long curRowCols = 0;
599
600    byte[] biggestRow = null;
601
602    private Cell prevCell = null;
603    private long maxRowBytes = 0;
604    private long curRowKeyLength;
605
606    public void collect(Cell cell) {
607      valLen.update(cell.getValueLength());
608      if (prevCell != null && CellComparator.getInstance().compareRows(prevCell, cell) != 0) {
609        // new row
610        collectRow();
611      }
612      curRowBytes += cell.getSerializedSize();
613      curRowKeyLength = KeyValueUtil.keyLength(cell);
614      curRowCols++;
615      prevCell = cell;
616    }
617
618    private void collectRow() {
619      rowSizeBytes.update(curRowBytes);
620      rowSizeCols.update(curRowCols);
621      keyLen.update(curRowKeyLength);
622
623      if (curRowBytes > maxRowBytes && prevCell != null) {
624        biggestRow = CellUtil.cloneRow(prevCell);
625        maxRowBytes = curRowBytes;
626      }
627
628      curRowBytes = 0;
629      curRowCols = 0;
630    }
631
632    public void finish() {
633      if (curRowCols > 0) {
634        collectRow();
635      }
636    }
637
638    @Override
639    public String toString() {
640      if (prevCell == null) return "no data available for statistics";
641
642      // Dump the metrics to the output stream
643      simpleReporter.stop();
644      simpleReporter.report();
645
646      return metricsOutput.toString() + "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
647    }
648  }
649
650  /**
651   * Almost identical to ConsoleReporter, but extending ScheduledReporter, as extending
652   * ConsoleReporter in this version of dropwizard is now too much trouble.
653   */
654  private static class SimpleReporter extends ScheduledReporter {
655    /**
656     * Returns a new {@link Builder} for {@link ConsoleReporter}.
657     * @param registry the registry to report
658     * @return a {@link Builder} instance for a {@link ConsoleReporter}
659     */
660    public static Builder forRegistry(MetricRegistry registry) {
661      return new Builder(registry);
662    }
663
664    /**
665     * A builder for {@link SimpleReporter} instances. Defaults to using the default locale and time
666     * zone, writing to {@code System.out}, converting rates to events/second, converting durations
667     * to milliseconds, and not filtering metrics.
668     */
669    public static class Builder {
670      private final MetricRegistry registry;
671      private PrintStream output;
672      private Locale locale;
673      private TimeZone timeZone;
674      private TimeUnit rateUnit;
675      private TimeUnit durationUnit;
676      private MetricFilter filter;
677
678      private Builder(MetricRegistry registry) {
679        this.registry = registry;
680        this.output = System.out;
681        this.locale = Locale.getDefault();
682        this.timeZone = TimeZone.getDefault();
683        this.rateUnit = TimeUnit.SECONDS;
684        this.durationUnit = TimeUnit.MILLISECONDS;
685        this.filter = MetricFilter.ALL;
686      }
687
688      /**
689       * Write to the given {@link PrintStream}.
690       * @param output a {@link PrintStream} instance.
691       * @return {@code this}
692       */
693      public Builder outputTo(PrintStream output) {
694        this.output = output;
695        return this;
696      }
697
698      /**
699       * Only report metrics which match the given filter.
700       * @param filter a {@link MetricFilter}
701       * @return {@code this}
702       */
703      public Builder filter(MetricFilter filter) {
704        this.filter = filter;
705        return this;
706      }
707
708      /**
709       * Builds a {@link ConsoleReporter} with the given properties.
710       * @return a {@link ConsoleReporter}
711       */
712      public SimpleReporter build() {
713        return new SimpleReporter(registry, output, locale, timeZone, rateUnit, durationUnit,
714          filter);
715      }
716    }
717
718    private final PrintStream output;
719    private final Locale locale;
720    private final DateFormat dateFormat;
721
722    private SimpleReporter(MetricRegistry registry, PrintStream output, Locale locale,
723      TimeZone timeZone, TimeUnit rateUnit, TimeUnit durationUnit, MetricFilter filter) {
724      super(registry, "simple-reporter", filter, rateUnit, durationUnit);
725      this.output = output;
726      this.locale = locale;
727
728      this.dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale);
729      dateFormat.setTimeZone(timeZone);
730    }
731
732    @Override
733    public void report(SortedMap<String, Gauge> gauges, SortedMap<String, Counter> counters,
734      SortedMap<String, Histogram> histograms, SortedMap<String, Meter> meters,
735      SortedMap<String, Timer> timers) {
736      // we know we only have histograms
737      if (!histograms.isEmpty()) {
738        for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
739          output.print("   " + StringUtils.substringAfterLast(entry.getKey(), "."));
740          output.println(':');
741          printHistogram(entry.getValue());
742        }
743        output.println();
744      }
745
746      output.println();
747      output.flush();
748    }
749
750    private void printHistogram(Histogram histogram) {
751      Snapshot snapshot = histogram.getSnapshot();
752      output.printf(locale, "               min = %d%n", snapshot.getMin());
753      output.printf(locale, "               max = %d%n", snapshot.getMax());
754      output.printf(locale, "              mean = %2.2f%n", snapshot.getMean());
755      output.printf(locale, "            stddev = %2.2f%n", snapshot.getStdDev());
756      output.printf(locale, "            median = %2.2f%n", snapshot.getMedian());
757      output.printf(locale, "              75%% <= %2.2f%n", snapshot.get75thPercentile());
758      output.printf(locale, "              95%% <= %2.2f%n", snapshot.get95thPercentile());
759      output.printf(locale, "              98%% <= %2.2f%n", snapshot.get98thPercentile());
760      output.printf(locale, "              99%% <= %2.2f%n", snapshot.get99thPercentile());
761      output.printf(locale, "            99.9%% <= %2.2f%n", snapshot.get999thPercentile());
762      output.printf(locale, "             count = %d%n", histogram.getCount());
763    }
764  }
765
766  public static void main(String[] args) throws Exception {
767    Configuration conf = HBaseConfiguration.create();
768    // no need for a block cache
769    conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
770    int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
771    System.exit(ret);
772  }
773}