001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static com.codahale.metrics.MetricRegistry.name; 021 022import com.codahale.metrics.ConsoleReporter; 023import com.codahale.metrics.Histogram; 024import com.codahale.metrics.MetricRegistry; 025import com.codahale.metrics.Snapshot; 026import com.codahale.metrics.UniformReservoir; 027import java.io.ByteArrayOutputStream; 028import java.io.DataInput; 029import java.io.IOException; 030import java.io.PrintStream; 031import java.text.DateFormat; 032import java.util.ArrayList; 033import java.util.Arrays; 034import java.util.HashMap; 035import java.util.Iterator; 036import java.util.LinkedHashSet; 037import java.util.List; 038import java.util.Locale; 039import java.util.Map; 040import java.util.Optional; 041import java.util.Set; 042import java.util.TimeZone; 043import java.util.concurrent.atomic.LongAdder; 044import org.apache.hadoop.conf.Configuration; 045import org.apache.hadoop.conf.Configured; 046import org.apache.hadoop.fs.FileSystem; 047import org.apache.hadoop.fs.Path; 048import org.apache.hadoop.hbase.Cell; 049import org.apache.hadoop.hbase.CellComparator; 050import org.apache.hadoop.hbase.CellUtil; 051import org.apache.hadoop.hbase.ExtendedCell; 052import org.apache.hadoop.hbase.HBaseConfiguration; 053import org.apache.hadoop.hbase.HBaseInterfaceAudience; 054import org.apache.hadoop.hbase.HConstants; 055import org.apache.hadoop.hbase.KeyValue; 056import org.apache.hadoop.hbase.KeyValueUtil; 057import org.apache.hadoop.hbase.PrivateCellUtil; 058import org.apache.hadoop.hbase.TableName; 059import org.apache.hadoop.hbase.Tag; 060import org.apache.hadoop.hbase.client.RegionInfo; 061import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 062import org.apache.hadoop.hbase.mob.MobUtils; 063import org.apache.hadoop.hbase.regionserver.HStoreFile; 064import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; 065import org.apache.hadoop.hbase.util.BloomFilter; 066import org.apache.hadoop.hbase.util.BloomFilterFactory; 067import org.apache.hadoop.hbase.util.BloomFilterUtil; 068import org.apache.hadoop.hbase.util.Bytes; 069import org.apache.hadoop.hbase.util.CommonFSUtils; 070import org.apache.hadoop.hbase.util.HFileArchiveUtil; 071import org.apache.hadoop.util.Tool; 072import org.apache.hadoop.util.ToolRunner; 073import org.apache.yetus.audience.InterfaceAudience; 074import org.apache.yetus.audience.InterfaceStability; 075import org.slf4j.Logger; 076import org.slf4j.LoggerFactory; 077 078import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 079import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; 080import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 081import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; 082import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionGroup; 083import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 084import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 085import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; 086 087/** 088 * Implements pretty-printing functionality for {@link HFile}s. 089 */ 090@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) 091@InterfaceStability.Evolving 092public class HFilePrettyPrinter extends Configured implements Tool { 093 094 private static final Logger LOG = LoggerFactory.getLogger(HFilePrettyPrinter.class); 095 096 private Options options = new Options(); 097 098 private boolean verbose; 099 private boolean printValue; 100 private boolean printKey; 101 private boolean shouldPrintMeta; 102 private boolean printBlockIndex; 103 private boolean printBlockHeaders; 104 private boolean printStats; 105 private boolean printStatRanges; 106 private boolean checkRow; 107 private boolean checkFamily; 108 private boolean isSeekToRow = false; 109 private boolean checkMobIntegrity = false; 110 private Map<String, List<Path>> mobFileLocations; 111 private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50; 112 private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20; 113 private PrintStream out = System.out; 114 private PrintStream err = System.err; 115 116 /** 117 * The row which the user wants to specify and print all the KeyValues for. 118 */ 119 private byte[] row = null; 120 121 private List<Path> files = new ArrayList<>(); 122 private int count; 123 124 private static final String FOUR_SPACES = " "; 125 126 public HFilePrettyPrinter() { 127 super(); 128 init(); 129 } 130 131 public HFilePrettyPrinter(Configuration conf) { 132 super(conf); 133 init(); 134 } 135 136 private void init() { 137 options.addOption("v", "verbose", false, "Verbose output; emits file and meta data delimiters"); 138 options.addOption("p", "printkv", false, "Print key/value pairs"); 139 options.addOption("e", "printkey", false, "Print keys"); 140 options.addOption("m", "printmeta", false, "Print meta data of file"); 141 options.addOption("b", "printblocks", false, "Print block index meta data"); 142 options.addOption("h", "printblockheaders", false, "Print block headers for each block."); 143 options.addOption("k", "checkrow", false, 144 "Enable row order check; looks for out-of-order keys"); 145 options.addOption("a", "checkfamily", false, "Enable family check"); 146 options.addOption("w", "seekToRow", true, 147 "Seek to this row and print all the kvs for this row only"); 148 options.addOption("s", "stats", false, "Print statistics"); 149 options.addOption("d", "details", false, 150 "Print detailed statistics, including counts by range"); 151 options.addOption("i", "checkMobIntegrity", false, 152 "Print all cells whose mob files are missing"); 153 154 OptionGroup files = new OptionGroup(); 155 files.addOption(new Option("f", "file", true, 156 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34")); 157 files.addOption( 158 new Option("r", "region", true, "Region to scan. Pass region name; e.g. 'hbase:meta,,1'")); 159 options.addOptionGroup(files); 160 } 161 162 public void setPrintStreams(PrintStream out, PrintStream err) { 163 this.out = out; 164 this.err = err; 165 } 166 167 public boolean parseOptions(String args[]) throws ParseException, IOException { 168 if (args.length == 0) { 169 HelpFormatter formatter = new HelpFormatter(); 170 formatter.printHelp("hfile", options, true); 171 return false; 172 } 173 CommandLineParser parser = new PosixParser(); 174 CommandLine cmd = parser.parse(options, args); 175 176 verbose = cmd.hasOption("v"); 177 printValue = cmd.hasOption("p"); 178 printKey = cmd.hasOption("e") || printValue; 179 shouldPrintMeta = cmd.hasOption("m"); 180 printBlockIndex = cmd.hasOption("b"); 181 printBlockHeaders = cmd.hasOption("h"); 182 printStatRanges = cmd.hasOption("d"); 183 printStats = cmd.hasOption("s") || printStatRanges; 184 checkRow = cmd.hasOption("k"); 185 checkFamily = cmd.hasOption("a"); 186 checkMobIntegrity = cmd.hasOption("i"); 187 188 if (cmd.hasOption("f")) { 189 files.add(new Path(cmd.getOptionValue("f"))); 190 } 191 192 if (cmd.hasOption("w")) { 193 String key = cmd.getOptionValue("w"); 194 if (key != null && key.length() != 0) { 195 row = Bytes.toBytesBinary(key); 196 isSeekToRow = true; 197 } else { 198 err.println("Invalid row is specified."); 199 System.exit(-1); 200 } 201 } 202 203 if (cmd.hasOption("r")) { 204 String regionName = cmd.getOptionValue("r"); 205 byte[] rn = Bytes.toBytes(regionName); 206 byte[][] hri = RegionInfo.parseRegionName(rn); 207 Path rootDir = CommonFSUtils.getRootDir(getConf()); 208 Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.valueOf(hri[0])); 209 String enc = RegionInfo.encodeRegionName(rn); 210 Path regionDir = new Path(tableDir, enc); 211 if (verbose) out.println("region dir -> " + regionDir); 212 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()), regionDir); 213 if (verbose) out.println("Number of region files found -> " + regionFiles.size()); 214 if (verbose) { 215 int i = 1; 216 for (Path p : regionFiles) { 217 if (verbose) out.println("Found file[" + i++ + "] -> " + p); 218 } 219 } 220 files.addAll(regionFiles); 221 } 222 223 if (checkMobIntegrity) { 224 if (verbose) { 225 System.out.println("checkMobIntegrity is enabled"); 226 } 227 mobFileLocations = new HashMap<>(); 228 } 229 230 cmd.getArgList().forEach((file) -> files.add(new Path(file))); 231 232 return true; 233 } 234 235 /** 236 * Runs the command-line pretty-printer, and returns the desired command exit code (zero for 237 * success, non-zero for failure). 238 */ 239 @Override 240 public int run(String[] args) { 241 if (getConf() == null) { 242 throw new RuntimeException("A Configuration instance must be provided."); 243 } 244 try { 245 CommonFSUtils.setFsDefault(getConf(), CommonFSUtils.getRootDir(getConf())); 246 if (!parseOptions(args)) { 247 return 1; 248 } 249 } catch (IOException ex) { 250 LOG.error("Error parsing command-line options", ex); 251 return 1; 252 } catch (ParseException ex) { 253 LOG.error("Error parsing command-line options", ex); 254 return 1; 255 } 256 257 // iterate over all files found 258 for (Path fileName : files) { 259 try { 260 int exitCode = processFile(fileName, false); 261 if (exitCode != 0) { 262 return exitCode; 263 } 264 } catch (IOException ex) { 265 LOG.error("Error reading " + fileName, ex); 266 return -2; 267 } 268 } 269 270 if (verbose || printKey) { 271 out.println("Scanned kv count -> " + count); 272 } 273 274 return 0; 275 } 276 277 // HBASE-22561 introduces boolean checkRootDir for WebUI specificly 278 public int processFile(Path file, boolean checkRootDir) throws IOException { 279 if (verbose) { 280 out.println("Scanning -> " + file); 281 } 282 283 if (checkRootDir) { 284 Path rootPath = CommonFSUtils.getRootDir(getConf()); 285 String rootString = rootPath + Path.SEPARATOR; 286 if (!file.toString().startsWith(rootString)) { 287 // First we see if fully-qualified URI matches the root dir. It might 288 // also be an absolute path in the same filesystem, so we prepend the FS 289 // of the root dir and see if that fully-qualified URI matches. 290 FileSystem rootFS = rootPath.getFileSystem(getConf()); 291 String qualifiedFile = rootFS.getUri().toString() + file.toString(); 292 if (!qualifiedFile.startsWith(rootString)) { 293 err.println( 294 "ERROR, file (" + file + ") is not in HBase's root directory (" + rootString + ")"); 295 return -2; 296 } 297 } 298 } 299 300 FileSystem fs = file.getFileSystem(getConf()); 301 if (!fs.exists(file)) { 302 err.println("ERROR, file doesnt exist: " + file); 303 return -2; 304 } 305 306 HFile.Reader reader = HFile.createReader(fs, file, CacheConfig.DISABLED, true, getConf()); 307 308 Map<byte[], byte[]> fileInfo = reader.getHFileInfo(); 309 310 KeyValueStatsCollector fileStats = null; 311 312 if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) { 313 // scan over file and read key/value's and check if requested 314 HFileScanner scanner = reader.getScanner(getConf(), false, false, false); 315 fileStats = new KeyValueStatsCollector(); 316 boolean shouldScanKeysValues; 317 if (this.isSeekToRow && !Bytes.equals(row, reader.getFirstRowKey().orElse(null))) { 318 // seek to the first kv on this row 319 shouldScanKeysValues = (scanner.seekTo(PrivateCellUtil.createFirstOnRow(this.row)) != -1); 320 } else { 321 shouldScanKeysValues = scanner.seekTo(); 322 } 323 if (shouldScanKeysValues) { 324 scanKeysValues(file, fileStats, scanner, row); 325 } 326 } 327 328 // print meta data 329 if (shouldPrintMeta) { 330 printMeta(reader, fileInfo); 331 } 332 333 if (printBlockIndex) { 334 out.println("Block Index:"); 335 out.println(reader.getDataBlockIndexReader()); 336 } 337 338 if (printBlockHeaders) { 339 out.println("Block Headers:"); 340 /* 341 * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and 342 * TestLazyDataBlockDecompression. Refactor? 343 */ 344 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file); 345 long fileSize = fs.getFileStatus(file).getLen(); 346 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 347 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 348 HFileBlock block; 349 while (offset <= max) { 350 block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false, 351 /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); 352 offset += block.getOnDiskSizeWithHeader(); 353 out.println(block); 354 } 355 } 356 357 if (printStats) { 358 fileStats.finish(printStatRanges); 359 out.println("Stats:\n" + fileStats); 360 } 361 362 reader.close(); 363 return 0; 364 } 365 366 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner, 367 byte[] row) throws IOException { 368 Cell pCell = null; 369 FileSystem fs = FileSystem.get(getConf()); 370 Set<String> foundMobFiles = new LinkedHashSet<>(FOUND_MOB_FILES_CACHE_CAPACITY); 371 Set<String> missingMobFiles = new LinkedHashSet<>(MISSING_MOB_FILES_CACHE_CAPACITY); 372 do { 373 ExtendedCell cell = scanner.getCell(); 374 if (row != null && row.length != 0) { 375 int result = CellComparator.getInstance().compareRows(cell, row, 0, row.length); 376 if (result > 0) { 377 break; 378 } else if (result < 0) { 379 continue; 380 } 381 } 382 // collect stats 383 if (printStats) { 384 fileStats.collect(cell, printStatRanges); 385 } 386 // dump key value 387 if (printKey) { 388 out.print("K: " + cell); 389 if (printValue) { 390 out.print(" V: " + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(), 391 cell.getValueLength())); 392 int i = 0; 393 List<Tag> tags = PrivateCellUtil.getTags(cell); 394 for (Tag tag : tags) { 395 out.print(String.format(" T[%d]: %s", i++, tag.toString())); 396 } 397 } 398 out.println(); 399 } 400 // check if rows are in order 401 if (checkRow && pCell != null) { 402 if (CellComparator.getInstance().compareRows(pCell, cell) > 0) { 403 err.println("WARNING, previous row is greater then" + " current row\n\tfilename -> " 404 + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> " 405 + CellUtil.getCellKeyAsString(cell)); 406 } 407 } 408 // check if families are consistent 409 if (checkFamily) { 410 String fam = 411 Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); 412 if (!file.toString().contains(fam)) { 413 err.println("WARNING, filename does not match kv family," + "\n\tfilename -> " + file 414 + "\n\tkeyvalue -> " + CellUtil.getCellKeyAsString(cell)); 415 } 416 if (pCell != null && CellComparator.getInstance().compareFamilies(pCell, cell) != 0) { 417 err.println( 418 "WARNING, previous kv has different family" + " compared to current key\n\tfilename -> " 419 + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> " 420 + CellUtil.getCellKeyAsString(cell)); 421 } 422 } 423 // check if mob files are missing. 424 if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) { 425 Optional<TableName> tn = MobUtils.getTableName(cell); 426 if (!tn.isPresent()) { 427 System.err.println( 428 "ERROR, wrong tag format in mob reference cell " + CellUtil.getCellKeyAsString(cell)); 429 } else if (!MobUtils.hasValidMobRefCellValue(cell)) { 430 System.err.println( 431 "ERROR, wrong value format in mob reference cell " + CellUtil.getCellKeyAsString(cell)); 432 } else { 433 String mobFileName = MobUtils.getMobFileName(cell); 434 boolean exist = mobFileExists(fs, tn.get(), mobFileName, 435 Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles); 436 if (!exist) { 437 // report error 438 System.err.println("ERROR, the mob file [" + mobFileName 439 + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell)); 440 } 441 } 442 } 443 pCell = cell; 444 ++count; 445 } while (scanner.next()); 446 } 447 448 /** 449 * Checks whether the referenced mob file exists. 450 */ 451 private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family, 452 Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException { 453 if (foundMobFiles.contains(mobFileName)) { 454 return true; 455 } 456 if (missingMobFiles.contains(mobFileName)) { 457 return false; 458 } 459 String tableName = tn.getNameAsString(); 460 List<Path> locations = mobFileLocations.get(tableName); 461 if (locations == null) { 462 locations = new ArrayList<>(2); 463 locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family)); 464 locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn, 465 MobUtils.getMobRegionInfo(tn).getEncodedName(), family)); 466 mobFileLocations.put(tn.getNameAsString(), locations); 467 } 468 boolean exist = false; 469 for (Path location : locations) { 470 Path mobFilePath = new Path(location, mobFileName); 471 if (fs.exists(mobFilePath)) { 472 exist = true; 473 break; 474 } 475 } 476 if (exist) { 477 evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY); 478 foundMobFiles.add(mobFileName); 479 } else { 480 evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY); 481 missingMobFiles.add(mobFileName); 482 } 483 return exist; 484 } 485 486 /** 487 * Evicts the cached mob files if the set is larger than the limit. 488 */ 489 private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) { 490 if (mobFileNames.size() < limit) { 491 return; 492 } 493 int index = 0; 494 int evict = limit / 2; 495 Iterator<String> fileNamesItr = mobFileNames.iterator(); 496 while (index < evict && fileNamesItr.hasNext()) { 497 fileNamesItr.next(); 498 fileNamesItr.remove(); 499 index++; 500 } 501 } 502 503 /** 504 * Format a string of the form "k1=v1, k2=v2, ..." into separate lines with a four-space 505 * indentation. 506 */ 507 private static String asSeparateLines(String keyValueStr) { 508 return keyValueStr.replaceAll(", ([a-zA-Z]+=)", ",\n" + FOUR_SPACES + "$1"); 509 } 510 511 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException { 512 out.println("Block index size as per heapsize: " + reader.indexSize()); 513 out.println(asSeparateLines(reader.toString())); 514 out.println("Trailer:\n " + asSeparateLines(reader.getTrailer().toString())); 515 out.println("Fileinfo:"); 516 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) { 517 out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = "); 518 if ( 519 Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY) 520 || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT) 521 || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS) 522 || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY) 523 || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS) 524 || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY) 525 ) { 526 out.println(Bytes.toLong(e.getValue())); 527 } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) { 528 TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue()); 529 out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax()); 530 } else if ( 531 Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN) 532 || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN) 533 || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION) 534 || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN) 535 ) { 536 out.println(Bytes.toInt(e.getValue())); 537 } else if ( 538 Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) 539 || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) 540 || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY) 541 || Bytes.equals(e.getKey(), HStoreFile.HISTORICAL_KEY) 542 ) { 543 out.println(Bytes.toBoolean(e.getValue())); 544 } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) { 545 out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString()); 546 } else { 547 out.println(Bytes.toStringBinary(e.getValue())); 548 } 549 } 550 551 try { 552 out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString)); 553 } catch (Exception e) { 554 out.println("Unable to retrieve the midkey"); 555 } 556 557 // Printing general bloom information 558 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata(); 559 BloomFilter bloomFilter = null; 560 if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); 561 562 out.println("Bloom filter:"); 563 if (bloomFilter != null) { 564 out.println(FOUR_SPACES 565 + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES)); 566 } else { 567 out.println(FOUR_SPACES + "Not present"); 568 } 569 570 // Printing delete bloom information 571 bloomMeta = reader.getDeleteBloomFilterMetadata(); 572 bloomFilter = null; 573 if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); 574 575 out.println("Delete Family Bloom filter:"); 576 if (bloomFilter != null) { 577 out.println(FOUR_SPACES 578 + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES)); 579 } else { 580 out.println(FOUR_SPACES + "Not present"); 581 } 582 } 583 584 // Default reservoir is exponentially decaying, but we're doing a point-in-time analysis 585 // of a store file. It doesn't make sense to prefer keys later in the store file. 586 private static final MetricRegistry.MetricSupplier<Histogram> UNIFORM_RESERVOIR = 587 () -> new Histogram(new UniformReservoir()); 588 589 // Useful ranges for viewing distribution of small to large keys, values, and rows. 590 // we only print ranges which actually have values, so more here doesn't add much overhead 591 private static final long[] RANGES = new long[] { 1, 3, 10, 50, 100, 500, 1_000, 5_000, 10_000, 592 50_000, 100_000, 500_000, 750_000, 1_000_000, 5_000_000, 10_000_000, 50_000_000, 100_000_000 }; 593 594 /** 595 * Holds a Histogram and supporting min/max and range buckets for analyzing distribution of key 596 * bytes, value bytes, row bytes, and row columns. Supports adding values, getting the histogram, 597 * and getting counts per range. 598 */ 599 static class KeyValueStats { 600 private final Histogram histogram; 601 private final String name; 602 private long max = Long.MIN_VALUE; 603 private long min = Long.MAX_VALUE; 604 private boolean collectRanges = false; 605 private final LongAdder[] rangeCounts; 606 607 KeyValueStats(MetricRegistry metricRegistry, String statName) { 608 this.histogram = 609 metricRegistry.histogram(name(HFilePrettyPrinter.class, statName), UNIFORM_RESERVOIR); 610 this.name = statName; 611 this.rangeCounts = new LongAdder[RANGES.length]; 612 for (int i = 0; i < rangeCounts.length; i++) { 613 rangeCounts[i] = new LongAdder(); 614 } 615 } 616 617 void update(long value, boolean collectRanges) { 618 histogram.update(value); 619 min = Math.min(value, min); 620 max = Math.max(value, max); 621 622 if (collectRanges) { 623 this.collectRanges = true; 624 int result = Arrays.binarySearch(RANGES, value); 625 int idx = result >= 0 ? result : Math.abs(result) - 1; 626 rangeCounts[idx].increment(); 627 } 628 } 629 630 Histogram getHistogram() { 631 return histogram; 632 } 633 634 String getName() { 635 return name; 636 } 637 638 long getMax() { 639 return max; 640 } 641 642 long getMin() { 643 return min; 644 } 645 646 long[] getRanges() { 647 return RANGES; 648 } 649 650 long getCountAtOrBelow(long range) { 651 long count = 0; 652 for (int i = 0; i < RANGES.length; i++) { 653 if (RANGES[i] <= range) { 654 count += rangeCounts[i].sum(); 655 } else { 656 break; 657 } 658 } 659 return count; 660 } 661 662 boolean hasRangeCounts() { 663 return collectRanges; 664 } 665 } 666 667 private static class KeyValueStatsCollector { 668 private final MetricRegistry metricsRegistry = new MetricRegistry(); 669 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream(); 670 671 KeyValueStats keyLen = new KeyValueStats(metricsRegistry, "Key length"); 672 KeyValueStats valLen = new KeyValueStats(metricsRegistry, "Val length"); 673 KeyValueStats rowSizeBytes = new KeyValueStats(metricsRegistry, "Row size (bytes)"); 674 KeyValueStats rowSizeCols = new KeyValueStats(metricsRegistry, "Row size (columns)"); 675 676 private final SimpleReporter simpleReporter = 677 SimpleReporter.newBuilder().outputTo(new PrintStream(metricsOutput)).addStats(keyLen) 678 .addStats(valLen).addStats(rowSizeBytes).addStats(rowSizeCols).build(); 679 680 long curRowBytes = 0; 681 long curRowCols = 0; 682 683 byte[] biggestRow = null; 684 685 private Cell prevCell = null; 686 private long maxRowBytes = 0; 687 private long curRowKeyLength; 688 689 public void collect(Cell cell, boolean printStatRanges) { 690 valLen.update(cell.getValueLength(), printStatRanges); 691 if (prevCell != null && CellComparator.getInstance().compareRows(prevCell, cell) != 0) { 692 // new row 693 collectRow(printStatRanges); 694 } 695 curRowBytes += cell.getSerializedSize(); 696 curRowKeyLength = KeyValueUtil.keyLength(cell); 697 curRowCols++; 698 prevCell = cell; 699 } 700 701 private void collectRow(boolean printStatRanges) { 702 rowSizeBytes.update(curRowBytes, printStatRanges); 703 rowSizeCols.update(curRowCols, printStatRanges); 704 keyLen.update(curRowKeyLength, printStatRanges); 705 706 if (curRowBytes > maxRowBytes && prevCell != null) { 707 biggestRow = CellUtil.cloneRow(prevCell); 708 maxRowBytes = curRowBytes; 709 } 710 711 curRowBytes = 0; 712 curRowCols = 0; 713 } 714 715 public void finish(boolean printStatRanges) { 716 if (curRowCols > 0) { 717 collectRow(printStatRanges); 718 } 719 } 720 721 @Override 722 public String toString() { 723 if (prevCell == null) return "no data available for statistics"; 724 725 // Dump the metrics to the output stream 726 simpleReporter.report(); 727 728 return metricsOutput.toString() + "Key of biggest row: " + Bytes.toStringBinary(biggestRow); 729 } 730 } 731 732 /** 733 * Simple reporter which collects registered histograms for printing to an output stream in 734 * {@link #report()}. 735 */ 736 private static final class SimpleReporter { 737 /** 738 * Returns a new {@link Builder} for {@link SimpleReporter}. 739 * @return a {@link Builder} instance for a {@link SimpleReporter} 740 */ 741 public static Builder newBuilder() { 742 return new Builder(); 743 } 744 745 /** 746 * A builder for {@link SimpleReporter} instances. Defaults to using the default locale and time 747 * zone, writing to {@code System.out}. 748 */ 749 public static class Builder { 750 private final List<KeyValueStats> stats = new ArrayList<>(); 751 private PrintStream output; 752 private Locale locale; 753 private TimeZone timeZone; 754 755 private Builder() { 756 this.output = System.out; 757 this.locale = Locale.getDefault(); 758 this.timeZone = TimeZone.getDefault(); 759 } 760 761 /** 762 * Write to the given {@link PrintStream}. 763 * @param output a {@link PrintStream} instance. 764 * @return {@code this} 765 */ 766 public Builder outputTo(PrintStream output) { 767 this.output = output; 768 return this; 769 } 770 771 /** 772 * Add the given {@link KeyValueStats} to be reported 773 * @param stat the stat to be reported 774 * @return {@code this} 775 */ 776 public Builder addStats(KeyValueStats stat) { 777 this.stats.add(stat); 778 return this; 779 } 780 781 /** 782 * Builds a {@link ConsoleReporter} with the given properties. 783 * @return a {@link ConsoleReporter} 784 */ 785 public SimpleReporter build() { 786 return new SimpleReporter(output, stats, locale, timeZone); 787 } 788 } 789 790 private final PrintStream output; 791 private final List<KeyValueStats> stats; 792 private final Locale locale; 793 private final DateFormat dateFormat; 794 795 private SimpleReporter(PrintStream output, List<KeyValueStats> stats, Locale locale, 796 TimeZone timeZone) { 797 this.output = output; 798 this.stats = stats; 799 this.locale = locale; 800 this.dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale); 801 dateFormat.setTimeZone(timeZone); 802 } 803 804 public void report() { 805 // we know we only have histograms 806 if (!stats.isEmpty()) { 807 for (KeyValueStats stat : stats) { 808 output.print(" " + stat.getName()); 809 output.println(':'); 810 printHistogram(stat); 811 } 812 output.println(); 813 } 814 815 output.println(); 816 output.flush(); 817 } 818 819 private void printHistogram(KeyValueStats stats) { 820 Histogram histogram = stats.getHistogram(); 821 Snapshot snapshot = histogram.getSnapshot(); 822 823 output.printf(locale, " min = %d%n", stats.getMin()); 824 output.printf(locale, " max = %d%n", stats.getMax()); 825 output.printf(locale, " mean = %2.2f%n", snapshot.getMean()); 826 output.printf(locale, " stddev = %2.2f%n", snapshot.getStdDev()); 827 output.printf(locale, " median = %2.2f%n", snapshot.getMedian()); 828 output.printf(locale, " 75%% <= %2.2f%n", snapshot.get75thPercentile()); 829 output.printf(locale, " 95%% <= %2.2f%n", snapshot.get95thPercentile()); 830 output.printf(locale, " 98%% <= %2.2f%n", snapshot.get98thPercentile()); 831 output.printf(locale, " 99%% <= %2.2f%n", snapshot.get99thPercentile()); 832 output.printf(locale, " 99.9%% <= %2.2f%n", snapshot.get999thPercentile()); 833 output.printf(locale, " count = %d%n", histogram.getCount()); 834 835 // if printStatRanges was enabled with -d arg, below we'll create an approximate histogram 836 // of counts based on the configured ranges in RANGES. Each range of sizes (i.e. <= 50, <= 837 // 100, etc) will have a count printed if any values were seen in that range. If no values 838 // were seen for a range, that range will be excluded to keep the output small. 839 if (stats.hasRangeCounts()) { 840 output.printf(locale, " (range <= count):%n"); 841 long lastVal = 0; 842 long lastRange = 0; 843 for (long range : stats.getRanges()) { 844 long val = stats.getCountAtOrBelow(range); 845 if (val - lastVal > 0) { 846 // print the last zero value before this one, to give context 847 if (lastVal == 0 && lastRange != 0) { 848 printRangeCount(lastRange, lastVal); 849 } 850 printRangeCount(range, val - lastVal); 851 } 852 lastVal = val; 853 lastRange = range; 854 } 855 if (histogram.getCount() - lastVal > 0) { 856 // print any remaining that might have been outside our buckets 857 printRangeCount(Long.MAX_VALUE, histogram.getCount() - lastVal); 858 } 859 } 860 } 861 862 private void printRangeCount(long range, long countAtOrBelow) { 863 String rangeString = range == Long.MAX_VALUE ? "inf" : Long.toString(range); 864 output.printf(locale, "%17s <= %d%n", rangeString, countAtOrBelow); 865 } 866 } 867 868 public static void main(String[] args) throws Exception { 869 Configuration conf = HBaseConfiguration.create(); 870 // no need for a block cache 871 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0); 872 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args); 873 System.exit(ret); 874 } 875}