001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static com.codahale.metrics.MetricRegistry.name; 021 022import com.codahale.metrics.ConsoleReporter; 023import com.codahale.metrics.Counter; 024import com.codahale.metrics.Gauge; 025import com.codahale.metrics.Histogram; 026import com.codahale.metrics.Meter; 027import com.codahale.metrics.MetricFilter; 028import com.codahale.metrics.MetricRegistry; 029import com.codahale.metrics.ScheduledReporter; 030import com.codahale.metrics.Snapshot; 031import com.codahale.metrics.Timer; 032import java.io.ByteArrayOutputStream; 033import java.io.DataInput; 034import java.io.IOException; 035import java.io.PrintStream; 036import java.text.DateFormat; 037import java.util.ArrayList; 038import java.util.HashMap; 039import java.util.Iterator; 040import java.util.LinkedHashSet; 041import java.util.List; 042import java.util.Locale; 043import java.util.Map; 044import java.util.Optional; 045import java.util.Set; 046import java.util.SortedMap; 047import java.util.TimeZone; 048import java.util.concurrent.TimeUnit; 049import org.apache.commons.lang3.StringUtils; 050import org.apache.hadoop.conf.Configuration; 051import org.apache.hadoop.conf.Configured; 052import org.apache.hadoop.fs.FileSystem; 053import org.apache.hadoop.fs.Path; 054import org.apache.hadoop.hbase.Cell; 055import org.apache.hadoop.hbase.CellComparator; 056import org.apache.hadoop.hbase.CellUtil; 057import org.apache.hadoop.hbase.HBaseConfiguration; 058import org.apache.hadoop.hbase.HBaseInterfaceAudience; 059import org.apache.hadoop.hbase.HConstants; 060import org.apache.hadoop.hbase.HRegionInfo; 061import org.apache.hadoop.hbase.KeyValue; 062import org.apache.hadoop.hbase.KeyValueUtil; 063import org.apache.hadoop.hbase.PrivateCellUtil; 064import org.apache.hadoop.hbase.TableName; 065import org.apache.hadoop.hbase.Tag; 066import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 067import org.apache.hadoop.hbase.mob.MobUtils; 068import org.apache.hadoop.hbase.regionserver.HStoreFile; 069import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; 070import org.apache.hadoop.hbase.util.BloomFilter; 071import org.apache.hadoop.hbase.util.BloomFilterFactory; 072import org.apache.hadoop.hbase.util.BloomFilterUtil; 073import org.apache.hadoop.hbase.util.Bytes; 074import org.apache.hadoop.hbase.util.CommonFSUtils; 075import org.apache.hadoop.hbase.util.HFileArchiveUtil; 076import org.apache.hadoop.util.Tool; 077import org.apache.hadoop.util.ToolRunner; 078import org.apache.yetus.audience.InterfaceAudience; 079import org.apache.yetus.audience.InterfaceStability; 080import org.slf4j.Logger; 081import org.slf4j.LoggerFactory; 082 083import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 084import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; 085import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 086import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; 087import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionGroup; 088import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 089import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 090import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; 091 092/** 093 * Implements pretty-printing functionality for {@link HFile}s. 094 */ 095@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) 096@InterfaceStability.Evolving 097public class HFilePrettyPrinter extends Configured implements Tool { 098 099 private static final Logger LOG = LoggerFactory.getLogger(HFilePrettyPrinter.class); 100 101 private Options options = new Options(); 102 103 private boolean verbose; 104 private boolean printValue; 105 private boolean printKey; 106 private boolean shouldPrintMeta; 107 private boolean printBlockIndex; 108 private boolean printBlockHeaders; 109 private boolean printStats; 110 private boolean checkRow; 111 private boolean checkFamily; 112 private boolean isSeekToRow = false; 113 private boolean checkMobIntegrity = false; 114 private Map<String, List<Path>> mobFileLocations; 115 private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50; 116 private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20; 117 private PrintStream out = System.out; 118 private PrintStream err = System.err; 119 120 /** 121 * The row which the user wants to specify and print all the KeyValues for. 122 */ 123 private byte[] row = null; 124 125 private List<Path> files = new ArrayList<>(); 126 private int count; 127 128 private static final String FOUR_SPACES = " "; 129 130 public HFilePrettyPrinter() { 131 super(); 132 init(); 133 } 134 135 public HFilePrettyPrinter(Configuration conf) { 136 super(conf); 137 init(); 138 } 139 140 private void init() { 141 options.addOption("v", "verbose", false, "Verbose output; emits file and meta data delimiters"); 142 options.addOption("p", "printkv", false, "Print key/value pairs"); 143 options.addOption("e", "printkey", false, "Print keys"); 144 options.addOption("m", "printmeta", false, "Print meta data of file"); 145 options.addOption("b", "printblocks", false, "Print block index meta data"); 146 options.addOption("h", "printblockheaders", false, "Print block headers for each block."); 147 options.addOption("k", "checkrow", false, 148 "Enable row order check; looks for out-of-order keys"); 149 options.addOption("a", "checkfamily", false, "Enable family check"); 150 options.addOption("w", "seekToRow", true, 151 "Seek to this row and print all the kvs for this row only"); 152 options.addOption("s", "stats", false, "Print statistics"); 153 options.addOption("i", "checkMobIntegrity", false, 154 "Print all cells whose mob files are missing"); 155 156 OptionGroup files = new OptionGroup(); 157 files.addOption(new Option("f", "file", true, 158 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34")); 159 files.addOption( 160 new Option("r", "region", true, "Region to scan. Pass region name; e.g. 'hbase:meta,,1'")); 161 options.addOptionGroup(files); 162 } 163 164 public void setPrintStreams(PrintStream out, PrintStream err) { 165 this.out = out; 166 this.err = err; 167 } 168 169 public boolean parseOptions(String args[]) throws ParseException, IOException { 170 if (args.length == 0) { 171 HelpFormatter formatter = new HelpFormatter(); 172 formatter.printHelp("hfile", options, true); 173 return false; 174 } 175 CommandLineParser parser = new PosixParser(); 176 CommandLine cmd = parser.parse(options, args); 177 178 verbose = cmd.hasOption("v"); 179 printValue = cmd.hasOption("p"); 180 printKey = cmd.hasOption("e") || printValue; 181 shouldPrintMeta = cmd.hasOption("m"); 182 printBlockIndex = cmd.hasOption("b"); 183 printBlockHeaders = cmd.hasOption("h"); 184 printStats = cmd.hasOption("s"); 185 checkRow = cmd.hasOption("k"); 186 checkFamily = cmd.hasOption("a"); 187 checkMobIntegrity = cmd.hasOption("i"); 188 189 if (cmd.hasOption("f")) { 190 files.add(new Path(cmd.getOptionValue("f"))); 191 } 192 193 if (cmd.hasOption("w")) { 194 String key = cmd.getOptionValue("w"); 195 if (key != null && key.length() != 0) { 196 row = Bytes.toBytesBinary(key); 197 isSeekToRow = true; 198 } else { 199 err.println("Invalid row is specified."); 200 System.exit(-1); 201 } 202 } 203 204 if (cmd.hasOption("r")) { 205 String regionName = cmd.getOptionValue("r"); 206 byte[] rn = Bytes.toBytes(regionName); 207 byte[][] hri = HRegionInfo.parseRegionName(rn); 208 Path rootDir = CommonFSUtils.getRootDir(getConf()); 209 Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.valueOf(hri[0])); 210 String enc = HRegionInfo.encodeRegionName(rn); 211 Path regionDir = new Path(tableDir, enc); 212 if (verbose) out.println("region dir -> " + regionDir); 213 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()), regionDir); 214 if (verbose) out.println("Number of region files found -> " + regionFiles.size()); 215 if (verbose) { 216 int i = 1; 217 for (Path p : regionFiles) { 218 if (verbose) out.println("Found file[" + i++ + "] -> " + p); 219 } 220 } 221 files.addAll(regionFiles); 222 } 223 224 if (checkMobIntegrity) { 225 if (verbose) { 226 System.out.println("checkMobIntegrity is enabled"); 227 } 228 mobFileLocations = new HashMap<>(); 229 } 230 231 cmd.getArgList().forEach((file) -> files.add(new Path(file))); 232 233 return true; 234 } 235 236 /** 237 * Runs the command-line pretty-printer, and returns the desired command exit code (zero for 238 * success, non-zero for failure). 239 */ 240 @Override 241 public int run(String[] args) { 242 if (getConf() == null) { 243 throw new RuntimeException("A Configuration instance must be provided."); 244 } 245 try { 246 CommonFSUtils.setFsDefault(getConf(), CommonFSUtils.getRootDir(getConf())); 247 if (!parseOptions(args)) { 248 return 1; 249 } 250 } catch (IOException ex) { 251 LOG.error("Error parsing command-line options", ex); 252 return 1; 253 } catch (ParseException ex) { 254 LOG.error("Error parsing command-line options", ex); 255 return 1; 256 } 257 258 // iterate over all files found 259 for (Path fileName : files) { 260 try { 261 int exitCode = processFile(fileName, false); 262 if (exitCode != 0) { 263 return exitCode; 264 } 265 } catch (IOException ex) { 266 LOG.error("Error reading " + fileName, ex); 267 return -2; 268 } 269 } 270 271 if (verbose || printKey) { 272 out.println("Scanned kv count -> " + count); 273 } 274 275 return 0; 276 } 277 278 // HBASE-22561 introduces boolean checkRootDir for WebUI specificly 279 public int processFile(Path file, boolean checkRootDir) throws IOException { 280 if (verbose) { 281 out.println("Scanning -> " + file); 282 } 283 284 if (checkRootDir) { 285 Path rootPath = CommonFSUtils.getRootDir(getConf()); 286 String rootString = rootPath + Path.SEPARATOR; 287 if (!file.toString().startsWith(rootString)) { 288 // First we see if fully-qualified URI matches the root dir. It might 289 // also be an absolute path in the same filesystem, so we prepend the FS 290 // of the root dir and see if that fully-qualified URI matches. 291 FileSystem rootFS = rootPath.getFileSystem(getConf()); 292 String qualifiedFile = rootFS.getUri().toString() + file.toString(); 293 if (!qualifiedFile.startsWith(rootString)) { 294 err.println( 295 "ERROR, file (" + file + ") is not in HBase's root directory (" + rootString + ")"); 296 return -2; 297 } 298 } 299 } 300 301 FileSystem fs = file.getFileSystem(getConf()); 302 if (!fs.exists(file)) { 303 err.println("ERROR, file doesnt exist: " + file); 304 return -2; 305 } 306 307 HFile.Reader reader = HFile.createReader(fs, file, CacheConfig.DISABLED, true, getConf()); 308 309 Map<byte[], byte[]> fileInfo = reader.getHFileInfo(); 310 311 KeyValueStatsCollector fileStats = null; 312 313 if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) { 314 // scan over file and read key/value's and check if requested 315 HFileScanner scanner = reader.getScanner(getConf(), false, false, false); 316 fileStats = new KeyValueStatsCollector(); 317 boolean shouldScanKeysValues; 318 if (this.isSeekToRow && !Bytes.equals(row, reader.getFirstRowKey().orElse(null))) { 319 // seek to the first kv on this row 320 shouldScanKeysValues = (scanner.seekTo(PrivateCellUtil.createFirstOnRow(this.row)) != -1); 321 } else { 322 shouldScanKeysValues = scanner.seekTo(); 323 } 324 if (shouldScanKeysValues) { 325 scanKeysValues(file, fileStats, scanner, row); 326 } 327 } 328 329 // print meta data 330 if (shouldPrintMeta) { 331 printMeta(reader, fileInfo); 332 } 333 334 if (printBlockIndex) { 335 out.println("Block Index:"); 336 out.println(reader.getDataBlockIndexReader()); 337 } 338 339 if (printBlockHeaders) { 340 out.println("Block Headers:"); 341 /* 342 * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and 343 * TestLazyDataBlockDecompression. Refactor? 344 */ 345 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file); 346 long fileSize = fs.getFileStatus(file).getLen(); 347 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 348 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 349 HFileBlock block; 350 while (offset <= max) { 351 block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false, 352 /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); 353 offset += block.getOnDiskSizeWithHeader(); 354 out.println(block); 355 } 356 } 357 358 if (printStats) { 359 fileStats.finish(); 360 out.println("Stats:\n" + fileStats); 361 } 362 363 reader.close(); 364 return 0; 365 } 366 367 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner, 368 byte[] row) throws IOException { 369 Cell pCell = null; 370 FileSystem fs = FileSystem.get(getConf()); 371 Set<String> foundMobFiles = new LinkedHashSet<>(FOUND_MOB_FILES_CACHE_CAPACITY); 372 Set<String> missingMobFiles = new LinkedHashSet<>(MISSING_MOB_FILES_CACHE_CAPACITY); 373 do { 374 Cell cell = scanner.getCell(); 375 if (row != null && row.length != 0) { 376 int result = CellComparator.getInstance().compareRows(cell, row, 0, row.length); 377 if (result > 0) { 378 break; 379 } else if (result < 0) { 380 continue; 381 } 382 } 383 // collect stats 384 if (printStats) { 385 fileStats.collect(cell); 386 } 387 // dump key value 388 if (printKey) { 389 out.print("K: " + cell); 390 if (printValue) { 391 out.print(" V: " + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(), 392 cell.getValueLength())); 393 int i = 0; 394 List<Tag> tags = PrivateCellUtil.getTags(cell); 395 for (Tag tag : tags) { 396 out.print(String.format(" T[%d]: %s", i++, tag.toString())); 397 } 398 } 399 out.println(); 400 } 401 // check if rows are in order 402 if (checkRow && pCell != null) { 403 if (CellComparator.getInstance().compareRows(pCell, cell) > 0) { 404 err.println("WARNING, previous row is greater then" + " current row\n\tfilename -> " 405 + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> " 406 + CellUtil.getCellKeyAsString(cell)); 407 } 408 } 409 // check if families are consistent 410 if (checkFamily) { 411 String fam = 412 Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); 413 if (!file.toString().contains(fam)) { 414 err.println("WARNING, filename does not match kv family," + "\n\tfilename -> " + file 415 + "\n\tkeyvalue -> " + CellUtil.getCellKeyAsString(cell)); 416 } 417 if (pCell != null && CellComparator.getInstance().compareFamilies(pCell, cell) != 0) { 418 err.println( 419 "WARNING, previous kv has different family" + " compared to current key\n\tfilename -> " 420 + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> " 421 + CellUtil.getCellKeyAsString(cell)); 422 } 423 } 424 // check if mob files are missing. 425 if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) { 426 Optional<TableName> tn = MobUtils.getTableName(cell); 427 if (!tn.isPresent()) { 428 System.err.println( 429 "ERROR, wrong tag format in mob reference cell " + CellUtil.getCellKeyAsString(cell)); 430 } else if (!MobUtils.hasValidMobRefCellValue(cell)) { 431 System.err.println( 432 "ERROR, wrong value format in mob reference cell " + CellUtil.getCellKeyAsString(cell)); 433 } else { 434 String mobFileName = MobUtils.getMobFileName(cell); 435 boolean exist = mobFileExists(fs, tn.get(), mobFileName, 436 Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles); 437 if (!exist) { 438 // report error 439 System.err.println("ERROR, the mob file [" + mobFileName 440 + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell)); 441 } 442 } 443 } 444 pCell = cell; 445 ++count; 446 } while (scanner.next()); 447 } 448 449 /** 450 * Checks whether the referenced mob file exists. 451 */ 452 private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family, 453 Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException { 454 if (foundMobFiles.contains(mobFileName)) { 455 return true; 456 } 457 if (missingMobFiles.contains(mobFileName)) { 458 return false; 459 } 460 String tableName = tn.getNameAsString(); 461 List<Path> locations = mobFileLocations.get(tableName); 462 if (locations == null) { 463 locations = new ArrayList<>(2); 464 locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family)); 465 locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn, 466 MobUtils.getMobRegionInfo(tn).getEncodedName(), family)); 467 mobFileLocations.put(tn.getNameAsString(), locations); 468 } 469 boolean exist = false; 470 for (Path location : locations) { 471 Path mobFilePath = new Path(location, mobFileName); 472 if (fs.exists(mobFilePath)) { 473 exist = true; 474 break; 475 } 476 } 477 if (exist) { 478 evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY); 479 foundMobFiles.add(mobFileName); 480 } else { 481 evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY); 482 missingMobFiles.add(mobFileName); 483 } 484 return exist; 485 } 486 487 /** 488 * Evicts the cached mob files if the set is larger than the limit. 489 */ 490 private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) { 491 if (mobFileNames.size() < limit) { 492 return; 493 } 494 int index = 0; 495 int evict = limit / 2; 496 Iterator<String> fileNamesItr = mobFileNames.iterator(); 497 while (index < evict && fileNamesItr.hasNext()) { 498 fileNamesItr.next(); 499 fileNamesItr.remove(); 500 index++; 501 } 502 } 503 504 /** 505 * Format a string of the form "k1=v1, k2=v2, ..." into separate lines with a four-space 506 * indentation. 507 */ 508 private static String asSeparateLines(String keyValueStr) { 509 return keyValueStr.replaceAll(", ([a-zA-Z]+=)", ",\n" + FOUR_SPACES + "$1"); 510 } 511 512 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException { 513 out.println("Block index size as per heapsize: " + reader.indexSize()); 514 out.println(asSeparateLines(reader.toString())); 515 out.println("Trailer:\n " + asSeparateLines(reader.getTrailer().toString())); 516 out.println("Fileinfo:"); 517 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) { 518 out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = "); 519 if ( 520 Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY) 521 || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT) 522 || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS) 523 || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY) 524 || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS) 525 || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY) 526 ) { 527 out.println(Bytes.toLong(e.getValue())); 528 } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) { 529 TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue()); 530 out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax()); 531 } else if ( 532 Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN) 533 || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN) 534 || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION) 535 || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN) 536 ) { 537 out.println(Bytes.toInt(e.getValue())); 538 } else if ( 539 Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) 540 || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) 541 || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY) 542 ) { 543 out.println(Bytes.toBoolean(e.getValue())); 544 } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) { 545 out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString()); 546 } else { 547 out.println(Bytes.toStringBinary(e.getValue())); 548 } 549 } 550 551 try { 552 out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString)); 553 } catch (Exception e) { 554 out.println("Unable to retrieve the midkey"); 555 } 556 557 // Printing general bloom information 558 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata(); 559 BloomFilter bloomFilter = null; 560 if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); 561 562 out.println("Bloom filter:"); 563 if (bloomFilter != null) { 564 out.println(FOUR_SPACES 565 + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES)); 566 } else { 567 out.println(FOUR_SPACES + "Not present"); 568 } 569 570 // Printing delete bloom information 571 bloomMeta = reader.getDeleteBloomFilterMetadata(); 572 bloomFilter = null; 573 if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); 574 575 out.println("Delete Family Bloom filter:"); 576 if (bloomFilter != null) { 577 out.println(FOUR_SPACES 578 + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES)); 579 } else { 580 out.println(FOUR_SPACES + "Not present"); 581 } 582 } 583 584 private static class KeyValueStatsCollector { 585 private final MetricRegistry metricsRegistry = new MetricRegistry(); 586 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream(); 587 private final SimpleReporter simpleReporter = SimpleReporter.forRegistry(metricsRegistry) 588 .outputTo(new PrintStream(metricsOutput)).filter(MetricFilter.ALL).build(); 589 590 Histogram keyLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Key length")); 591 Histogram valLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Val length")); 592 Histogram rowSizeBytes = 593 metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Row size (bytes)")); 594 Histogram rowSizeCols = 595 metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Row size (columns)")); 596 597 long curRowBytes = 0; 598 long curRowCols = 0; 599 600 byte[] biggestRow = null; 601 602 private Cell prevCell = null; 603 private long maxRowBytes = 0; 604 private long curRowKeyLength; 605 606 public void collect(Cell cell) { 607 valLen.update(cell.getValueLength()); 608 if (prevCell != null && CellComparator.getInstance().compareRows(prevCell, cell) != 0) { 609 // new row 610 collectRow(); 611 } 612 curRowBytes += cell.getSerializedSize(); 613 curRowKeyLength = KeyValueUtil.keyLength(cell); 614 curRowCols++; 615 prevCell = cell; 616 } 617 618 private void collectRow() { 619 rowSizeBytes.update(curRowBytes); 620 rowSizeCols.update(curRowCols); 621 keyLen.update(curRowKeyLength); 622 623 if (curRowBytes > maxRowBytes && prevCell != null) { 624 biggestRow = CellUtil.cloneRow(prevCell); 625 maxRowBytes = curRowBytes; 626 } 627 628 curRowBytes = 0; 629 curRowCols = 0; 630 } 631 632 public void finish() { 633 if (curRowCols > 0) { 634 collectRow(); 635 } 636 } 637 638 @Override 639 public String toString() { 640 if (prevCell == null) return "no data available for statistics"; 641 642 // Dump the metrics to the output stream 643 simpleReporter.stop(); 644 simpleReporter.report(); 645 646 return metricsOutput.toString() + "Key of biggest row: " + Bytes.toStringBinary(biggestRow); 647 } 648 } 649 650 /** 651 * Almost identical to ConsoleReporter, but extending ScheduledReporter, as extending 652 * ConsoleReporter in this version of dropwizard is now too much trouble. 653 */ 654 private static class SimpleReporter extends ScheduledReporter { 655 /** 656 * Returns a new {@link Builder} for {@link ConsoleReporter}. 657 * @param registry the registry to report 658 * @return a {@link Builder} instance for a {@link ConsoleReporter} 659 */ 660 public static Builder forRegistry(MetricRegistry registry) { 661 return new Builder(registry); 662 } 663 664 /** 665 * A builder for {@link SimpleReporter} instances. Defaults to using the default locale and time 666 * zone, writing to {@code System.out}, converting rates to events/second, converting durations 667 * to milliseconds, and not filtering metrics. 668 */ 669 public static class Builder { 670 private final MetricRegistry registry; 671 private PrintStream output; 672 private Locale locale; 673 private TimeZone timeZone; 674 private TimeUnit rateUnit; 675 private TimeUnit durationUnit; 676 private MetricFilter filter; 677 678 private Builder(MetricRegistry registry) { 679 this.registry = registry; 680 this.output = System.out; 681 this.locale = Locale.getDefault(); 682 this.timeZone = TimeZone.getDefault(); 683 this.rateUnit = TimeUnit.SECONDS; 684 this.durationUnit = TimeUnit.MILLISECONDS; 685 this.filter = MetricFilter.ALL; 686 } 687 688 /** 689 * Write to the given {@link PrintStream}. 690 * @param output a {@link PrintStream} instance. 691 * @return {@code this} 692 */ 693 public Builder outputTo(PrintStream output) { 694 this.output = output; 695 return this; 696 } 697 698 /** 699 * Only report metrics which match the given filter. 700 * @param filter a {@link MetricFilter} 701 * @return {@code this} 702 */ 703 public Builder filter(MetricFilter filter) { 704 this.filter = filter; 705 return this; 706 } 707 708 /** 709 * Builds a {@link ConsoleReporter} with the given properties. 710 * @return a {@link ConsoleReporter} 711 */ 712 public SimpleReporter build() { 713 return new SimpleReporter(registry, output, locale, timeZone, rateUnit, durationUnit, 714 filter); 715 } 716 } 717 718 private final PrintStream output; 719 private final Locale locale; 720 private final DateFormat dateFormat; 721 722 private SimpleReporter(MetricRegistry registry, PrintStream output, Locale locale, 723 TimeZone timeZone, TimeUnit rateUnit, TimeUnit durationUnit, MetricFilter filter) { 724 super(registry, "simple-reporter", filter, rateUnit, durationUnit); 725 this.output = output; 726 this.locale = locale; 727 728 this.dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale); 729 dateFormat.setTimeZone(timeZone); 730 } 731 732 @Override 733 public void report(SortedMap<String, Gauge> gauges, SortedMap<String, Counter> counters, 734 SortedMap<String, Histogram> histograms, SortedMap<String, Meter> meters, 735 SortedMap<String, Timer> timers) { 736 // we know we only have histograms 737 if (!histograms.isEmpty()) { 738 for (Map.Entry<String, Histogram> entry : histograms.entrySet()) { 739 output.print(" " + StringUtils.substringAfterLast(entry.getKey(), ".")); 740 output.println(':'); 741 printHistogram(entry.getValue()); 742 } 743 output.println(); 744 } 745 746 output.println(); 747 output.flush(); 748 } 749 750 private void printHistogram(Histogram histogram) { 751 Snapshot snapshot = histogram.getSnapshot(); 752 output.printf(locale, " min = %d%n", snapshot.getMin()); 753 output.printf(locale, " max = %d%n", snapshot.getMax()); 754 output.printf(locale, " mean = %2.2f%n", snapshot.getMean()); 755 output.printf(locale, " stddev = %2.2f%n", snapshot.getStdDev()); 756 output.printf(locale, " median = %2.2f%n", snapshot.getMedian()); 757 output.printf(locale, " 75%% <= %2.2f%n", snapshot.get75thPercentile()); 758 output.printf(locale, " 95%% <= %2.2f%n", snapshot.get95thPercentile()); 759 output.printf(locale, " 98%% <= %2.2f%n", snapshot.get98thPercentile()); 760 output.printf(locale, " 99%% <= %2.2f%n", snapshot.get99thPercentile()); 761 output.printf(locale, " 99.9%% <= %2.2f%n", snapshot.get999thPercentile()); 762 output.printf(locale, " count = %d%n", histogram.getCount()); 763 } 764 } 765 766 public static void main(String[] args) throws Exception { 767 Configuration conf = HBaseConfiguration.create(); 768 // no need for a block cache 769 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0); 770 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args); 771 System.exit(ret); 772 } 773}