001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017package org.apache.hadoop.hbase.regionserver; 018 019import java.io.ByteArrayInputStream; 020import java.io.ByteArrayOutputStream; 021import java.io.IOException; 022import java.io.InputStream; 023import java.text.DecimalFormat; 024import java.util.ArrayList; 025import java.util.Iterator; 026import java.util.List; 027import java.util.Locale; 028 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.Cell; 033import org.apache.hadoop.hbase.HBaseConfiguration; 034import org.apache.hadoop.hbase.KeyValue; 035import org.apache.hadoop.hbase.KeyValueUtil; 036import org.apache.hadoop.hbase.io.compress.Compression; 037import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 038import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; 039import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 040import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock; 041import org.apache.hadoop.hbase.io.hfile.CacheConfig; 042import org.apache.hadoop.hbase.io.hfile.HFileBlock; 043import org.apache.hadoop.hbase.io.hfile.HFileContext; 044import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 045import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.apache.hadoop.io.compress.CompressionOutputStream; 048import org.apache.hadoop.io.compress.Compressor; 049import org.apache.hadoop.io.compress.Decompressor; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 053import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; 054import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; 055import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 056import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 057import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; 058 059/** 060 * Tests various algorithms for key compression on an existing HFile. Useful 061 * for testing, debugging and benchmarking. 062 */ 063public class DataBlockEncodingTool { 064 private static final Logger LOG = LoggerFactory.getLogger( 065 DataBlockEncodingTool.class); 066 067 private static final boolean includesMemstoreTS = true; 068 069 /** 070 * How many times to run the benchmark. More times means better data in terms 071 * of statistics but slower execution. Has to be strictly larger than 072 * {@link #DEFAULT_BENCHMARK_N_OMIT}. 073 */ 074 private static final int DEFAULT_BENCHMARK_N_TIMES = 12; 075 076 /** 077 * How many first runs should not be included in the benchmark. Done in order 078 * to exclude setup cost. 079 */ 080 private static final int DEFAULT_BENCHMARK_N_OMIT = 2; 081 082 /** HFile name to be used in benchmark */ 083 private static final String OPT_HFILE_NAME = "f"; 084 085 /** Maximum number of key/value pairs to process in a single benchmark run */ 086 private static final String OPT_KV_LIMIT = "n"; 087 088 /** Whether to run a benchmark to measure read throughput */ 089 private static final String OPT_MEASURE_THROUGHPUT = "b"; 090 091 /** If this is specified, no correctness testing will be done */ 092 private static final String OPT_OMIT_CORRECTNESS_TEST = "c"; 093 094 /** What encoding algorithm to test */ 095 private static final String OPT_ENCODING_ALGORITHM = "a"; 096 097 /** Number of times to run each benchmark */ 098 private static final String OPT_BENCHMARK_N_TIMES = "t"; 099 100 /** Number of first runs of every benchmark to omit from statistics */ 101 private static final String OPT_BENCHMARK_N_OMIT = "omit"; 102 103 /** Compression algorithm to use if not specified on the command line */ 104 private static final Algorithm DEFAULT_COMPRESSION = 105 Compression.Algorithm.GZ; 106 107 private static final DecimalFormat DELIMITED_DECIMAL_FORMAT = 108 new DecimalFormat(); 109 110 static { 111 DELIMITED_DECIMAL_FORMAT.setGroupingSize(3); 112 } 113 114 private static final String PCT_FORMAT = "%.2f %%"; 115 private static final String INT_FORMAT = "%d"; 116 117 private static int benchmarkNTimes = DEFAULT_BENCHMARK_N_TIMES; 118 private static int benchmarkNOmit = DEFAULT_BENCHMARK_N_OMIT; 119 120 private List<EncodedDataBlock> codecs = new ArrayList<>(); 121 private long totalPrefixLength = 0; 122 private long totalKeyLength = 0; 123 private long totalValueLength = 0; 124 private long totalKeyRedundancyLength = 0; 125 private long totalCFLength = 0; 126 127 private byte[] rawKVs; 128 private boolean useHBaseChecksum = false; 129 130 private final String compressionAlgorithmName; 131 private final Algorithm compressionAlgorithm; 132 private final Compressor compressor; 133 private final Decompressor decompressor; 134 135 private static enum Manipulation { 136 ENCODING, 137 DECODING, 138 COMPRESSION, 139 DECOMPRESSION; 140 141 @Override 142 public String toString() { 143 String s = super.toString(); 144 StringBuilder sb = new StringBuilder(); 145 sb.append(s.charAt(0)); 146 sb.append(s.substring(1).toLowerCase(Locale.ROOT)); 147 return sb.toString(); 148 } 149 } 150 151 /** 152 * @param compressionAlgorithmName What kind of algorithm should be used 153 * as baseline for comparison (e.g. lzo, gz). 154 */ 155 public DataBlockEncodingTool(String compressionAlgorithmName) { 156 this.compressionAlgorithmName = compressionAlgorithmName; 157 this.compressionAlgorithm = Compression.getCompressionAlgorithmByName( 158 compressionAlgorithmName); 159 this.compressor = this.compressionAlgorithm.getCompressor(); 160 this.decompressor = this.compressionAlgorithm.getDecompressor(); 161 } 162 163 /** 164 * Check statistics for given HFile for different data block encoders. 165 * @param scanner Of file which will be compressed. 166 * @param kvLimit Maximal count of KeyValue which will be processed. 167 * @throws IOException thrown if scanner is invalid 168 */ 169 public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) 170 throws IOException { 171 scanner.seek(KeyValue.LOWESTKEY); 172 173 KeyValue currentKV; 174 175 byte[] previousKey = null; 176 byte[] currentKey; 177 178 DataBlockEncoding[] encodings = DataBlockEncoding.values(); 179 180 ByteArrayOutputStream uncompressedOutputStream = 181 new ByteArrayOutputStream(); 182 183 int j = 0; 184 while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) { 185 // Iterates through key/value pairs 186 j++; 187 currentKey = currentKV.getKey(); 188 if (previousKey != null) { 189 for (int i = 0; i < previousKey.length && i < currentKey.length && 190 previousKey[i] == currentKey[i]; ++i) { 191 totalKeyRedundancyLength++; 192 } 193 } 194 195 uncompressedOutputStream.write(currentKV.getBuffer(), 196 currentKV.getOffset(), currentKV.getLength()); 197 198 previousKey = currentKey; 199 200 int kLen = currentKV.getKeyLength(); 201 int vLen = currentKV.getValueLength(); 202 int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset()); 203 int restLen = currentKV.getLength() - kLen - vLen; 204 205 totalKeyLength += kLen; 206 totalValueLength += vLen; 207 totalPrefixLength += restLen; 208 totalCFLength += cfLen; 209 } 210 211 rawKVs = uncompressedOutputStream.toByteArray(); 212 boolean useTag = (currentKV.getTagsLength() > 0); 213 for (DataBlockEncoding encoding : encodings) { 214 if (encoding == DataBlockEncoding.NONE) { 215 continue; 216 } 217 DataBlockEncoder d = encoding.getEncoder(); 218 HFileContext meta = new HFileContextBuilder() 219 .withCompression(Compression.Algorithm.NONE) 220 .withIncludesMvcc(includesMemstoreTS) 221 .withIncludesTags(useTag).build(); 222 codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta )); 223 } 224 } 225 226 /** 227 * Verify if all data block encoders are working properly. 228 * 229 * @param scanner Of file which was compressed. 230 * @param kvLimit Maximal count of KeyValue which will be processed. 231 * @return true if all data block encoders compressed/decompressed correctly. 232 * @throws IOException thrown if scanner is invalid 233 */ 234 public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit) 235 throws IOException { 236 KeyValue currentKv; 237 238 scanner.seek(KeyValue.LOWESTKEY); 239 List<Iterator<Cell>> codecIterators = new ArrayList<>(); 240 for(EncodedDataBlock codec : codecs) { 241 codecIterators.add(codec.getIterator(HFileBlock.headerSize(useHBaseChecksum))); 242 } 243 244 int j = 0; 245 while ((currentKv = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) { 246 // Iterates through key/value pairs 247 ++j; 248 for (Iterator<Cell> it : codecIterators) { 249 Cell c = it.next(); 250 KeyValue codecKv = KeyValueUtil.ensureKeyValue(c); 251 if (codecKv == null || 0 != Bytes.compareTo( 252 codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(), 253 currentKv.getBuffer(), currentKv.getOffset(), 254 currentKv.getLength())) { 255 if (codecKv == null) { 256 LOG.error("There is a bug in codec " + it + 257 " it returned null KeyValue,"); 258 } else { 259 int prefix = 0; 260 int limitLength = 2 * Bytes.SIZEOF_INT + 261 Math.min(codecKv.getLength(), currentKv.getLength()); 262 while (prefix < limitLength && 263 codecKv.getBuffer()[prefix + codecKv.getOffset()] == 264 currentKv.getBuffer()[prefix + currentKv.getOffset()]) { 265 prefix++; 266 } 267 268 LOG.error("There is bug in codec " + it.toString() + 269 "\n on element " + j + 270 "\n codecKv.getKeyLength() " + codecKv.getKeyLength() + 271 "\n codecKv.getValueLength() " + codecKv.getValueLength() + 272 "\n codecKv.getLength() " + codecKv.getLength() + 273 "\n currentKv.getKeyLength() " + currentKv.getKeyLength() + 274 "\n currentKv.getValueLength() " + currentKv.getValueLength() + 275 "\n codecKv.getLength() " + currentKv.getLength() + 276 "\n currentKV rowLength " + currentKv.getRowLength() + 277 " familyName " + currentKv.getFamilyLength() + 278 " qualifier " + currentKv.getQualifierLength() + 279 "\n prefix " + prefix + 280 "\n codecKv '" + Bytes.toStringBinary(codecKv.getBuffer(), 281 codecKv.getOffset(), prefix) + "' diff '" + 282 Bytes.toStringBinary(codecKv.getBuffer(), 283 codecKv.getOffset() + prefix, codecKv.getLength() - 284 prefix) + "'" + 285 "\n currentKv '" + Bytes.toStringBinary( 286 currentKv.getBuffer(), 287 currentKv.getOffset(), prefix) + "' diff '" + 288 Bytes.toStringBinary(currentKv.getBuffer(), 289 currentKv.getOffset() + prefix, currentKv.getLength() - 290 prefix) + "'" 291 ); 292 } 293 return false; 294 } 295 } 296 } 297 298 LOG.info("Verification was successful!"); 299 300 return true; 301 } 302 303 /** 304 * Benchmark codec's speed. 305 */ 306 public void benchmarkCodecs() throws IOException { 307 LOG.info("Starting a throughput benchmark for data block encoding codecs"); 308 int prevTotalSize = -1; 309 for (EncodedDataBlock codec : codecs) { 310 prevTotalSize = benchmarkEncoder(prevTotalSize, codec); 311 } 312 313 benchmarkDefaultCompression(prevTotalSize, rawKVs); 314 } 315 316 /** 317 * Benchmark compression/decompression throughput. 318 * @param previousTotalSize Total size used for verification. Use -1 if 319 * unknown. 320 * @param codec Tested encoder. 321 * @return Size of uncompressed data. 322 */ 323 private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) { 324 int prevTotalSize = previousTotalSize; 325 int totalSize = 0; 326 327 // decompression time 328 List<Long> durations = new ArrayList<>(); 329 for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) { 330 totalSize = 0; 331 332 Iterator<Cell> it; 333 334 it = codec.getIterator(HFileBlock.headerSize(useHBaseChecksum)); 335 336 // count only the algorithm time, without memory allocations 337 // (expect first time) 338 final long startTime = System.nanoTime(); 339 while (it.hasNext()) { 340 totalSize += KeyValueUtil.ensureKeyValue(it.next()).getLength(); 341 } 342 final long finishTime = System.nanoTime(); 343 if (itTime >= benchmarkNOmit) { 344 durations.add(finishTime - startTime); 345 } 346 347 if (prevTotalSize != -1 && prevTotalSize != totalSize) { 348 throw new IllegalStateException(String.format( 349 "Algorithm '%s' decoded data to different size", codec.toString())); 350 } 351 prevTotalSize = totalSize; 352 } 353 354 List<Long> encodingDurations = new ArrayList<>(); 355 for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) { 356 final long startTime = System.nanoTime(); 357 codec.encodeData(); 358 final long finishTime = System.nanoTime(); 359 if (itTime >= benchmarkNOmit) { 360 encodingDurations.add(finishTime - startTime); 361 } 362 } 363 364 System.out.println(codec.toString() + ":"); 365 printBenchmarkResult(totalSize, encodingDurations, Manipulation.ENCODING); 366 printBenchmarkResult(totalSize, durations, Manipulation.DECODING); 367 System.out.println(); 368 369 return prevTotalSize; 370 } 371 372 private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer) 373 throws IOException { 374 benchmarkAlgorithm(compressionAlgorithm, 375 compressionAlgorithmName.toUpperCase(Locale.ROOT), rawBuffer, 0, totalSize); 376 } 377 378 /** 379 * Check decompress performance of a given algorithm and print it. 380 * @param algorithm Compression algorithm. 381 * @param name Name of algorithm. 382 * @param buffer Buffer to be compressed. 383 * @param offset Position of the beginning of the data. 384 * @param length Length of data in buffer. 385 * @throws IOException 386 */ 387 public void benchmarkAlgorithm(Compression.Algorithm algorithm, String name, 388 byte[] buffer, int offset, int length) throws IOException { 389 System.out.println(name + ":"); 390 391 // compress it 392 List<Long> compressDurations = new ArrayList<>(); 393 ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); 394 CompressionOutputStream compressingStream = 395 algorithm.createPlainCompressionStream(compressedStream, compressor); 396 try { 397 for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) { 398 final long startTime = System.nanoTime(); 399 compressingStream.resetState(); 400 compressedStream.reset(); 401 compressingStream.write(buffer, offset, length); 402 compressingStream.flush(); 403 compressedStream.toByteArray(); 404 405 final long finishTime = System.nanoTime(); 406 407 // add time record 408 if (itTime >= benchmarkNOmit) { 409 compressDurations.add(finishTime - startTime); 410 } 411 } 412 } catch (IOException e) { 413 throw new RuntimeException(String.format( 414 "Benchmark, or encoding algorithm '%s' cause some stream problems", 415 name), e); 416 } 417 compressingStream.close(); 418 printBenchmarkResult(length, compressDurations, Manipulation.COMPRESSION); 419 420 byte[] compBuffer = compressedStream.toByteArray(); 421 422 // uncompress it several times and measure performance 423 List<Long> durations = new ArrayList<>(); 424 for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) { 425 final long startTime = System.nanoTime(); 426 byte[] newBuf = new byte[length + 1]; 427 428 try { 429 ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer, 430 0, compBuffer.length); 431 InputStream decompressedStream = algorithm.createDecompressionStream( 432 downStream, decompressor, 0); 433 434 int destOffset = 0; 435 int nextChunk; 436 while ((nextChunk = decompressedStream.available()) > 0) { 437 destOffset += decompressedStream.read(newBuf, destOffset, nextChunk); 438 } 439 decompressedStream.close(); 440 441 // iterate over KeyValues 442 KeyValue kv; 443 for (int pos = 0; pos < length; pos += kv.getLength()) { 444 kv = new KeyValue(newBuf, pos); 445 } 446 447 } catch (IOException e) { 448 throw new RuntimeException(String.format( 449 "Decoding path in '%s' algorithm cause exception ", name), e); 450 } 451 452 final long finishTime = System.nanoTime(); 453 454 // check correctness 455 if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) { 456 int prefix = 0; 457 for(; prefix < buffer.length && prefix < newBuf.length; ++prefix) { 458 if (buffer[prefix] != newBuf[prefix]) { 459 break; 460 } 461 } 462 throw new RuntimeException(String.format( 463 "Algorithm '%s' is corrupting the data", name)); 464 } 465 466 // add time record 467 if (itTime >= benchmarkNOmit) { 468 durations.add(finishTime - startTime); 469 } 470 } 471 printBenchmarkResult(length, durations, Manipulation.DECOMPRESSION); 472 System.out.println(); 473 } 474 475 private static final double BYTES_IN_MB = 1024 * 1024.0; 476 private static final double NS_IN_SEC = 1000.0 * 1000.0 * 1000.0; 477 private static final double MB_SEC_COEF = NS_IN_SEC / BYTES_IN_MB; 478 479 private static void printBenchmarkResult(int totalSize, 480 List<Long> durationsInNanoSec, Manipulation manipulation) { 481 final int n = durationsInNanoSec.size(); 482 long meanTime = 0; 483 for (long time : durationsInNanoSec) { 484 meanTime += time; 485 } 486 meanTime /= n; 487 488 double meanMBPerSec = totalSize * MB_SEC_COEF / meanTime; 489 double mbPerSecSTD = 0; 490 if (n > 0) { 491 for (long time : durationsInNanoSec) { 492 double mbPerSec = totalSize * MB_SEC_COEF / time; 493 double dev = mbPerSec - meanMBPerSec; 494 mbPerSecSTD += dev * dev; 495 } 496 mbPerSecSTD = Math.sqrt(mbPerSecSTD / n); 497 } 498 499 outputTuple(manipulation + " performance", "%6.2f MB/s (+/- %.2f MB/s)", 500 meanMBPerSec, mbPerSecSTD); 501 } 502 503 private static void outputTuple(String caption, String format, 504 Object... values) { 505 if (format.startsWith(INT_FORMAT)) { 506 format = "%s" + format.substring(INT_FORMAT.length()); 507 values[0] = DELIMITED_DECIMAL_FORMAT.format(values[0]); 508 } 509 510 StringBuilder sb = new StringBuilder(); 511 sb.append(" "); 512 sb.append(caption); 513 sb.append(":"); 514 515 String v = String.format(format, values); 516 int padding = 60 - sb.length() - v.length(); 517 for (int i = 0; i < padding; ++i) { 518 sb.append(' '); 519 } 520 sb.append(v); 521 System.out.println(sb); 522 } 523 524 /** 525 * Display statistics of different compression algorithms. 526 * @throws IOException 527 */ 528 public void displayStatistics() throws IOException { 529 final String comprAlgo = compressionAlgorithmName.toUpperCase(Locale.ROOT); 530 long rawBytes = totalKeyLength + totalPrefixLength + totalValueLength; 531 532 System.out.println("Raw data size:"); 533 outputTuple("Raw bytes", INT_FORMAT, rawBytes); 534 outputTuplePct("Key bytes", totalKeyLength); 535 outputTuplePct("Value bytes", totalValueLength); 536 outputTuplePct("KV infrastructure", totalPrefixLength); 537 outputTuplePct("CF overhead", totalCFLength); 538 outputTuplePct("Total key redundancy", totalKeyRedundancyLength); 539 540 int compressedSize = EncodedDataBlock.getCompressedSize( 541 compressionAlgorithm, compressor, rawKVs, 0, rawKVs.length); 542 outputTuple(comprAlgo + " only size", INT_FORMAT, 543 compressedSize); 544 outputSavings(comprAlgo + " only", compressedSize, rawBytes); 545 System.out.println(); 546 547 for (EncodedDataBlock codec : codecs) { 548 System.out.println(codec.toString()); 549 long encodedBytes = codec.getSize(); 550 outputTuple("Encoded bytes", INT_FORMAT, encodedBytes); 551 outputSavings("Key encoding", encodedBytes - totalValueLength, 552 rawBytes - totalValueLength); 553 outputSavings("Total encoding", encodedBytes, rawBytes); 554 555 int encodedCompressedSize = codec.getEncodedCompressedSize( 556 compressionAlgorithm, compressor); 557 outputTuple("Encoding + " + comprAlgo + " size", INT_FORMAT, 558 encodedCompressedSize); 559 outputSavings("Encoding + " + comprAlgo, encodedCompressedSize, rawBytes); 560 outputSavings("Encoding with " + comprAlgo, encodedCompressedSize, 561 compressedSize); 562 563 System.out.println(); 564 } 565 } 566 567 private void outputTuplePct(String caption, long size) { 568 outputTuple(caption, INT_FORMAT + " (" + PCT_FORMAT + ")", 569 size, size * 100.0 / rawKVs.length); 570 } 571 572 private void outputSavings(String caption, long part, long whole) { 573 double pct = 100.0 * (1 - 1.0 * part / whole); 574 double times = whole * 1.0 / part; 575 outputTuple(caption + " savings", PCT_FORMAT + " (%.2f x)", 576 pct, times); 577 } 578 579 /** 580 * Test a data block encoder on the given HFile. Output results to console. 581 * @param kvLimit The limit of KeyValue which will be analyzed. 582 * @param hfilePath an HFile path on the file system. 583 * @param compressionName Compression algorithm used for comparison. 584 * @param doBenchmark Run performance benchmarks. 585 * @param doVerify Verify correctness. 586 * @throws IOException When pathName is incorrect. 587 */ 588 public static void testCodecs(Configuration conf, int kvLimit, 589 String hfilePath, String compressionName, boolean doBenchmark, 590 boolean doVerify) throws IOException { 591 // create environment 592 Path path = new Path(hfilePath); 593 CacheConfig cacheConf = new CacheConfig(conf); 594 FileSystem fs = FileSystem.get(conf); 595 HStoreFile hsf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, true); 596 hsf.initReader(); 597 StoreFileReader reader = hsf.getReader(); 598 reader.loadFileInfo(); 599 KeyValueScanner scanner = reader.getStoreFileScanner(true, true, false, 0, 0, false); 600 601 // run the utilities 602 DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName); 603 int majorVersion = reader.getHFileVersion(); 604 comp.useHBaseChecksum = majorVersion > 2 || 605 (majorVersion == 2 && 606 reader.getHFileMinorVersion() >= HFileReaderImpl.MINOR_VERSION_WITH_CHECKSUM); 607 comp.checkStatistics(scanner, kvLimit); 608 if (doVerify) { 609 comp.verifyCodecs(scanner, kvLimit); 610 } 611 if (doBenchmark) { 612 comp.benchmarkCodecs(); 613 } 614 comp.displayStatistics(); 615 616 // cleanup 617 scanner.close(); 618 reader.close(cacheConf.shouldEvictOnClose()); 619 } 620 621 private static void printUsage(Options options) { 622 System.err.println("Usage:"); 623 System.err.println(String.format("./hbase %s <options>", 624 DataBlockEncodingTool.class.getName())); 625 System.err.println("Options:"); 626 for (Object it : options.getOptions()) { 627 Option opt = (Option) it; 628 if (opt.hasArg()) { 629 System.err.println(String.format("-%s %s: %s", opt.getOpt(), 630 opt.getArgName(), opt.getDescription())); 631 } else { 632 System.err.println(String.format("-%s: %s", opt.getOpt(), 633 opt.getDescription())); 634 } 635 } 636 } 637 638 /** 639 * A command line interface to benchmarks. Parses command-line arguments and 640 * runs the appropriate benchmarks. 641 * @param args Should have length at least 1 and holds the file path to HFile. 642 * @throws IOException If you specified the wrong file. 643 */ 644 public static void main(final String[] args) throws IOException { 645 // set up user arguments 646 Options options = new Options(); 647 options.addOption(OPT_HFILE_NAME, true, "HFile to analyse (REQUIRED)"); 648 options.getOption(OPT_HFILE_NAME).setArgName("FILENAME"); 649 options.addOption(OPT_KV_LIMIT, true, 650 "Maximum number of KeyValues to process. A benchmark stops running " + 651 "after iterating over this many KV pairs."); 652 options.getOption(OPT_KV_LIMIT).setArgName("NUMBER"); 653 options.addOption(OPT_MEASURE_THROUGHPUT, false, 654 "Measure read throughput"); 655 options.addOption(OPT_OMIT_CORRECTNESS_TEST, false, 656 "Omit corectness tests."); 657 options.addOption(OPT_ENCODING_ALGORITHM, true, 658 "What kind of compression algorithm use for comparison."); 659 options.addOption(OPT_BENCHMARK_N_TIMES, 660 true, "Number of times to run each benchmark. Default value: " + 661 DEFAULT_BENCHMARK_N_TIMES); 662 options.addOption(OPT_BENCHMARK_N_OMIT, true, 663 "Number of first runs of every benchmark to exclude from " 664 + "statistics (" + DEFAULT_BENCHMARK_N_OMIT 665 + " by default, so that " + "only the last " 666 + (DEFAULT_BENCHMARK_N_TIMES - DEFAULT_BENCHMARK_N_OMIT) 667 + " times are included in statistics.)"); 668 669 // parse arguments 670 CommandLineParser parser = new PosixParser(); 671 CommandLine cmd = null; 672 try { 673 cmd = parser.parse(options, args); 674 } catch (ParseException e) { 675 System.err.println("Could not parse arguments!"); 676 System.exit(-1); 677 return; // avoid warning 678 } 679 680 int kvLimit = Integer.MAX_VALUE; 681 if (cmd.hasOption(OPT_KV_LIMIT)) { 682 kvLimit = Integer.parseInt(cmd.getOptionValue(OPT_KV_LIMIT)); 683 } 684 685 // basic argument sanity checks 686 if (!cmd.hasOption(OPT_HFILE_NAME)) { 687 LOG.error("Please specify HFile name using the " + OPT_HFILE_NAME 688 + " option"); 689 printUsage(options); 690 System.exit(-1); 691 } 692 693 String pathName = cmd.getOptionValue(OPT_HFILE_NAME); 694 String compressionName = DEFAULT_COMPRESSION.getName(); 695 if (cmd.hasOption(OPT_ENCODING_ALGORITHM)) { 696 compressionName = 697 cmd.getOptionValue(OPT_ENCODING_ALGORITHM).toLowerCase(Locale.ROOT); 698 } 699 boolean doBenchmark = cmd.hasOption(OPT_MEASURE_THROUGHPUT); 700 boolean doVerify = !cmd.hasOption(OPT_OMIT_CORRECTNESS_TEST); 701 702 if (cmd.hasOption(OPT_BENCHMARK_N_TIMES)) { 703 benchmarkNTimes = Integer.valueOf(cmd.getOptionValue( 704 OPT_BENCHMARK_N_TIMES)); 705 } 706 if (cmd.hasOption(OPT_BENCHMARK_N_OMIT)) { 707 benchmarkNOmit = 708 Integer.valueOf(cmd.getOptionValue(OPT_BENCHMARK_N_OMIT)); 709 } 710 if (benchmarkNTimes < benchmarkNOmit) { 711 LOG.error("The number of times to run each benchmark (" 712 + benchmarkNTimes 713 + ") must be greater than the number of benchmark runs to exclude " 714 + "from statistics (" + benchmarkNOmit + ")"); 715 System.exit(1); 716 } 717 LOG.info("Running benchmark " + benchmarkNTimes + " times. " + 718 "Excluding the first " + benchmarkNOmit + " times from statistics."); 719 720 final Configuration conf = HBaseConfiguration.create(); 721 try { 722 testCodecs(conf, kvLimit, pathName, compressionName, doBenchmark, 723 doVerify); 724 } finally { 725 (new CacheConfig(conf)).getBlockCache().shutdown(); 726 } 727 } 728 729}