001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations
015 * under the License.
016 */
017package org.apache.hadoop.hbase.regionserver;
018
019import java.io.ByteArrayInputStream;
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.text.DecimalFormat;
024import java.util.ArrayList;
025import java.util.Iterator;
026import java.util.List;
027import java.util.Locale;
028
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.Cell;
033import org.apache.hadoop.hbase.HBaseConfiguration;
034import org.apache.hadoop.hbase.KeyValue;
035import org.apache.hadoop.hbase.KeyValueUtil;
036import org.apache.hadoop.hbase.io.compress.Compression;
037import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
038import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
039import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
040import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;
041import org.apache.hadoop.hbase.io.hfile.CacheConfig;
042import org.apache.hadoop.hbase.io.hfile.HFileBlock;
043import org.apache.hadoop.hbase.io.hfile.HFileContext;
044import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
045import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.io.compress.CompressionOutputStream;
048import org.apache.hadoop.io.compress.Compressor;
049import org.apache.hadoop.io.compress.Decompressor;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
053import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
054import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
055import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
056import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
057import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser;
058
059/**
060 * Tests various algorithms for key compression on an existing HFile. Useful
061 * for testing, debugging and benchmarking.
062 */
063public class DataBlockEncodingTool {
064  private static final Logger LOG = LoggerFactory.getLogger(
065      DataBlockEncodingTool.class);
066
067  private static final boolean includesMemstoreTS = true;
068
069  /**
070   * How many times to run the benchmark. More times means better data in terms
071   * of statistics but slower execution. Has to be strictly larger than
072   * {@link #DEFAULT_BENCHMARK_N_OMIT}.
073   */
074  private static final int DEFAULT_BENCHMARK_N_TIMES = 12;
075
076  /**
077   * How many first runs should not be included in the benchmark. Done in order
078   * to exclude setup cost.
079   */
080  private static final int DEFAULT_BENCHMARK_N_OMIT = 2;
081
082  /** HFile name to be used in benchmark */
083  private static final String OPT_HFILE_NAME = "f";
084
085  /** Maximum number of key/value pairs to process in a single benchmark run */
086  private static final String OPT_KV_LIMIT = "n";
087
088  /** Whether to run a benchmark to measure read throughput */
089  private static final String OPT_MEASURE_THROUGHPUT = "b";
090
091  /** If this is specified, no correctness testing will be done */
092  private static final String OPT_OMIT_CORRECTNESS_TEST = "c";
093
094  /** What encoding algorithm to test */
095  private static final String OPT_ENCODING_ALGORITHM = "a";
096
097  /** Number of times to run each benchmark */
098  private static final String OPT_BENCHMARK_N_TIMES = "t";
099
100  /** Number of first runs of every benchmark to omit from statistics */
101  private static final String OPT_BENCHMARK_N_OMIT = "omit";
102
103  /** Compression algorithm to use if not specified on the command line */
104  private static final Algorithm DEFAULT_COMPRESSION =
105      Compression.Algorithm.GZ;
106
107  private static final DecimalFormat DELIMITED_DECIMAL_FORMAT =
108      new DecimalFormat();
109
110  static {
111    DELIMITED_DECIMAL_FORMAT.setGroupingSize(3);
112  }
113
114  private static final String PCT_FORMAT = "%.2f %%";
115  private static final String INT_FORMAT = "%d";
116
117  private static int benchmarkNTimes = DEFAULT_BENCHMARK_N_TIMES;
118  private static int benchmarkNOmit = DEFAULT_BENCHMARK_N_OMIT;
119
120  private List<EncodedDataBlock> codecs = new ArrayList<>();
121  private long totalPrefixLength = 0;
122  private long totalKeyLength = 0;
123  private long totalValueLength = 0;
124  private long totalKeyRedundancyLength = 0;
125  private long totalCFLength = 0;
126
127  private byte[] rawKVs;
128  private boolean useHBaseChecksum = false;
129
130  private final String compressionAlgorithmName;
131  private final Algorithm compressionAlgorithm;
132  private final Compressor compressor;
133  private final Decompressor decompressor;
134
135  private static enum Manipulation {
136    ENCODING,
137    DECODING,
138    COMPRESSION,
139    DECOMPRESSION;
140
141    @Override
142    public String toString() {
143      String s = super.toString();
144      StringBuilder sb = new StringBuilder();
145      sb.append(s.charAt(0));
146      sb.append(s.substring(1).toLowerCase(Locale.ROOT));
147      return sb.toString();
148    }
149  }
150
151  /**
152   * @param compressionAlgorithmName What kind of algorithm should be used
153   *                                 as baseline for comparison (e.g. lzo, gz).
154   */
155  public DataBlockEncodingTool(String compressionAlgorithmName) {
156    this.compressionAlgorithmName = compressionAlgorithmName;
157    this.compressionAlgorithm = Compression.getCompressionAlgorithmByName(
158        compressionAlgorithmName);
159    this.compressor = this.compressionAlgorithm.getCompressor();
160    this.decompressor = this.compressionAlgorithm.getDecompressor();
161  }
162
163  /**
164   * Check statistics for given HFile for different data block encoders.
165   * @param scanner Of file which will be compressed.
166   * @param kvLimit Maximal count of KeyValue which will be processed.
167   * @throws IOException thrown if scanner is invalid
168   */
169  public void checkStatistics(final KeyValueScanner scanner, final int kvLimit)
170      throws IOException {
171    scanner.seek(KeyValue.LOWESTKEY);
172
173    KeyValue currentKV;
174
175    byte[] previousKey = null;
176    byte[] currentKey;
177
178    DataBlockEncoding[] encodings = DataBlockEncoding.values();
179
180    ByteArrayOutputStream uncompressedOutputStream =
181        new ByteArrayOutputStream();
182
183    int j = 0;
184    while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
185      // Iterates through key/value pairs
186      j++;
187      currentKey = currentKV.getKey();
188      if (previousKey != null) {
189        for (int i = 0; i < previousKey.length && i < currentKey.length &&
190            previousKey[i] == currentKey[i]; ++i) {
191          totalKeyRedundancyLength++;
192        }
193      }
194
195      uncompressedOutputStream.write(currentKV.getBuffer(),
196          currentKV.getOffset(), currentKV.getLength());
197
198      previousKey = currentKey;
199
200      int kLen = currentKV.getKeyLength();
201      int vLen = currentKV.getValueLength();
202      int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset());
203      int restLen = currentKV.getLength() - kLen - vLen;
204
205      totalKeyLength += kLen;
206      totalValueLength += vLen;
207      totalPrefixLength += restLen;
208      totalCFLength += cfLen;
209    }
210
211    rawKVs = uncompressedOutputStream.toByteArray();
212    boolean useTag = (currentKV.getTagsLength() > 0);
213    for (DataBlockEncoding encoding : encodings) {
214      if (encoding == DataBlockEncoding.NONE) {
215        continue;
216      }
217      DataBlockEncoder d = encoding.getEncoder();
218      HFileContext meta = new HFileContextBuilder()
219                          .withCompression(Compression.Algorithm.NONE)
220                          .withIncludesMvcc(includesMemstoreTS)
221                          .withIncludesTags(useTag).build();
222      codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta ));
223    }
224  }
225
226  /**
227   * Verify if all data block encoders are working properly.
228   *
229   * @param scanner Of file which was compressed.
230   * @param kvLimit Maximal count of KeyValue which will be processed.
231   * @return true if all data block encoders compressed/decompressed correctly.
232   * @throws IOException thrown if scanner is invalid
233   */
234  public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit)
235      throws IOException {
236    KeyValue currentKv;
237
238    scanner.seek(KeyValue.LOWESTKEY);
239    List<Iterator<Cell>> codecIterators = new ArrayList<>();
240    for(EncodedDataBlock codec : codecs) {
241      codecIterators.add(codec.getIterator(HFileBlock.headerSize(useHBaseChecksum)));
242    }
243
244    int j = 0;
245    while ((currentKv = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
246      // Iterates through key/value pairs
247      ++j;
248      for (Iterator<Cell> it : codecIterators) {
249        Cell c = it.next();
250        KeyValue codecKv = KeyValueUtil.ensureKeyValue(c);
251        if (codecKv == null || 0 != Bytes.compareTo(
252            codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(),
253            currentKv.getBuffer(), currentKv.getOffset(),
254            currentKv.getLength())) {
255          if (codecKv == null) {
256            LOG.error("There is a bug in codec " + it +
257                " it returned null KeyValue,");
258          } else {
259            int prefix = 0;
260            int limitLength = 2 * Bytes.SIZEOF_INT +
261                Math.min(codecKv.getLength(), currentKv.getLength());
262            while (prefix < limitLength &&
263                codecKv.getBuffer()[prefix + codecKv.getOffset()] ==
264                currentKv.getBuffer()[prefix + currentKv.getOffset()]) {
265              prefix++;
266            }
267
268            LOG.error("There is bug in codec " + it.toString() +
269                "\n on element " + j +
270                "\n codecKv.getKeyLength() " + codecKv.getKeyLength() +
271                "\n codecKv.getValueLength() " + codecKv.getValueLength() +
272                "\n codecKv.getLength() " + codecKv.getLength() +
273                "\n currentKv.getKeyLength() " + currentKv.getKeyLength() +
274                "\n currentKv.getValueLength() " + currentKv.getValueLength() +
275                "\n codecKv.getLength() " + currentKv.getLength() +
276                "\n currentKV rowLength " + currentKv.getRowLength() +
277                " familyName " + currentKv.getFamilyLength() +
278                " qualifier " + currentKv.getQualifierLength() +
279                "\n prefix " + prefix +
280                "\n codecKv   '" + Bytes.toStringBinary(codecKv.getBuffer(),
281                    codecKv.getOffset(), prefix) + "' diff '" +
282                    Bytes.toStringBinary(codecKv.getBuffer(),
283                        codecKv.getOffset() + prefix, codecKv.getLength() -
284                        prefix) + "'" +
285                "\n currentKv '" + Bytes.toStringBinary(
286                   currentKv.getBuffer(),
287                   currentKv.getOffset(), prefix) + "' diff '" +
288                   Bytes.toStringBinary(currentKv.getBuffer(),
289                       currentKv.getOffset() + prefix, currentKv.getLength() -
290                       prefix) + "'"
291                );
292          }
293          return false;
294        }
295      }
296    }
297
298    LOG.info("Verification was successful!");
299
300    return true;
301  }
302
303  /**
304   * Benchmark codec's speed.
305   */
306  public void benchmarkCodecs() throws IOException {
307    LOG.info("Starting a throughput benchmark for data block encoding codecs");
308    int prevTotalSize = -1;
309    for (EncodedDataBlock codec : codecs) {
310      prevTotalSize = benchmarkEncoder(prevTotalSize, codec);
311    }
312
313    benchmarkDefaultCompression(prevTotalSize, rawKVs);
314  }
315
316  /**
317   * Benchmark compression/decompression throughput.
318   * @param previousTotalSize Total size used for verification. Use -1 if
319   *          unknown.
320   * @param codec Tested encoder.
321   * @return Size of uncompressed data.
322   */
323  private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) {
324    int prevTotalSize = previousTotalSize;
325    int totalSize = 0;
326
327    // decompression time
328    List<Long> durations = new ArrayList<>();
329    for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
330      totalSize = 0;
331
332      Iterator<Cell> it;
333
334      it = codec.getIterator(HFileBlock.headerSize(useHBaseChecksum));
335
336      // count only the algorithm time, without memory allocations
337      // (expect first time)
338      final long startTime = System.nanoTime();
339      while (it.hasNext()) {
340        totalSize += KeyValueUtil.ensureKeyValue(it.next()).getLength();
341      }
342      final long finishTime = System.nanoTime();
343      if (itTime >= benchmarkNOmit) {
344        durations.add(finishTime - startTime);
345      }
346
347      if (prevTotalSize != -1 && prevTotalSize != totalSize) {
348        throw new IllegalStateException(String.format(
349            "Algorithm '%s' decoded data to different size", codec.toString()));
350      }
351      prevTotalSize = totalSize;
352    }
353
354    List<Long> encodingDurations = new ArrayList<>();
355    for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
356      final long startTime = System.nanoTime();
357      codec.encodeData();
358      final long finishTime = System.nanoTime();
359      if (itTime >= benchmarkNOmit) {
360        encodingDurations.add(finishTime - startTime);
361      }
362    }
363
364    System.out.println(codec.toString() + ":");
365    printBenchmarkResult(totalSize, encodingDurations, Manipulation.ENCODING);
366    printBenchmarkResult(totalSize, durations, Manipulation.DECODING);
367    System.out.println();
368
369    return prevTotalSize;
370  }
371
372  private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer)
373      throws IOException {
374    benchmarkAlgorithm(compressionAlgorithm,
375        compressionAlgorithmName.toUpperCase(Locale.ROOT), rawBuffer, 0, totalSize);
376  }
377
378  /**
379   * Check decompress performance of a given algorithm and print it.
380   * @param algorithm Compression algorithm.
381   * @param name Name of algorithm.
382   * @param buffer Buffer to be compressed.
383   * @param offset Position of the beginning of the data.
384   * @param length Length of data in buffer.
385   * @throws IOException
386   */
387  public void benchmarkAlgorithm(Compression.Algorithm algorithm, String name,
388      byte[] buffer, int offset, int length) throws IOException {
389    System.out.println(name + ":");
390
391    // compress it
392    List<Long> compressDurations = new ArrayList<>();
393    ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
394    CompressionOutputStream compressingStream =
395        algorithm.createPlainCompressionStream(compressedStream, compressor);
396    try {
397      for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
398        final long startTime = System.nanoTime();
399        compressingStream.resetState();
400        compressedStream.reset();
401        compressingStream.write(buffer, offset, length);
402        compressingStream.flush();
403        compressedStream.toByteArray();
404
405        final long finishTime = System.nanoTime();
406
407        // add time record
408        if (itTime >= benchmarkNOmit) {
409          compressDurations.add(finishTime - startTime);
410        }
411      }
412    } catch (IOException e) {
413      throw new RuntimeException(String.format(
414          "Benchmark, or encoding algorithm '%s' cause some stream problems",
415          name), e);
416    }
417    compressingStream.close();
418    printBenchmarkResult(length, compressDurations, Manipulation.COMPRESSION);
419
420    byte[] compBuffer = compressedStream.toByteArray();
421
422    // uncompress it several times and measure performance
423    List<Long> durations = new ArrayList<>();
424    for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
425      final long startTime = System.nanoTime();
426      byte[] newBuf = new byte[length + 1];
427
428      try {
429        ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer,
430            0, compBuffer.length);
431        InputStream decompressedStream = algorithm.createDecompressionStream(
432            downStream, decompressor, 0);
433
434        int destOffset = 0;
435        int nextChunk;
436        while ((nextChunk = decompressedStream.available()) > 0) {
437          destOffset += decompressedStream.read(newBuf, destOffset, nextChunk);
438        }
439        decompressedStream.close();
440
441        // iterate over KeyValues
442        KeyValue kv;
443        for (int pos = 0; pos < length; pos += kv.getLength()) {
444          kv = new KeyValue(newBuf, pos);
445        }
446
447      } catch (IOException e) {
448        throw new RuntimeException(String.format(
449            "Decoding path in '%s' algorithm cause exception ", name), e);
450      }
451
452      final long finishTime = System.nanoTime();
453
454      // check correctness
455      if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) {
456        int prefix = 0;
457        for(; prefix < buffer.length && prefix < newBuf.length; ++prefix) {
458          if (buffer[prefix] != newBuf[prefix]) {
459            break;
460          }
461        }
462        throw new RuntimeException(String.format(
463            "Algorithm '%s' is corrupting the data", name));
464      }
465
466      // add time record
467      if (itTime >= benchmarkNOmit) {
468        durations.add(finishTime - startTime);
469      }
470    }
471    printBenchmarkResult(length, durations, Manipulation.DECOMPRESSION);
472    System.out.println();
473  }
474
475  private static final double BYTES_IN_MB = 1024 * 1024.0;
476  private static final double NS_IN_SEC = 1000.0 * 1000.0 * 1000.0;
477  private static final double MB_SEC_COEF = NS_IN_SEC / BYTES_IN_MB;
478
479  private static void printBenchmarkResult(int totalSize,
480      List<Long> durationsInNanoSec, Manipulation manipulation) {
481    final int n = durationsInNanoSec.size();
482    long meanTime = 0;
483    for (long time : durationsInNanoSec) {
484      meanTime += time;
485    }
486    meanTime /= n;
487
488    double meanMBPerSec = totalSize * MB_SEC_COEF / meanTime;
489    double mbPerSecSTD = 0;
490    if (n > 0) {
491      for (long time : durationsInNanoSec) {
492        double mbPerSec = totalSize * MB_SEC_COEF / time;
493        double dev = mbPerSec - meanMBPerSec;
494        mbPerSecSTD += dev * dev;
495      }
496      mbPerSecSTD = Math.sqrt(mbPerSecSTD / n);
497    }
498
499    outputTuple(manipulation + " performance", "%6.2f MB/s (+/- %.2f MB/s)",
500         meanMBPerSec, mbPerSecSTD);
501  }
502
503  private static void outputTuple(String caption, String format,
504      Object... values) {
505    if (format.startsWith(INT_FORMAT)) {
506      format = "%s" + format.substring(INT_FORMAT.length());
507      values[0] = DELIMITED_DECIMAL_FORMAT.format(values[0]);
508    }
509
510    StringBuilder sb = new StringBuilder();
511    sb.append("  ");
512    sb.append(caption);
513    sb.append(":");
514
515    String v = String.format(format, values);
516    int padding = 60 - sb.length() - v.length();
517    for (int i = 0; i < padding; ++i) {
518      sb.append(' ');
519    }
520    sb.append(v);
521    System.out.println(sb);
522  }
523
524  /**
525   * Display statistics of different compression algorithms.
526   * @throws IOException
527   */
528  public void displayStatistics() throws IOException {
529    final String comprAlgo = compressionAlgorithmName.toUpperCase(Locale.ROOT);
530    long rawBytes = totalKeyLength + totalPrefixLength + totalValueLength;
531
532    System.out.println("Raw data size:");
533    outputTuple("Raw bytes", INT_FORMAT, rawBytes);
534    outputTuplePct("Key bytes", totalKeyLength);
535    outputTuplePct("Value bytes", totalValueLength);
536    outputTuplePct("KV infrastructure", totalPrefixLength);
537    outputTuplePct("CF overhead", totalCFLength);
538    outputTuplePct("Total key redundancy", totalKeyRedundancyLength);
539
540    int compressedSize = EncodedDataBlock.getCompressedSize(
541        compressionAlgorithm, compressor, rawKVs, 0, rawKVs.length);
542    outputTuple(comprAlgo + " only size", INT_FORMAT,
543        compressedSize);
544    outputSavings(comprAlgo + " only", compressedSize, rawBytes);
545    System.out.println();
546
547    for (EncodedDataBlock codec : codecs) {
548      System.out.println(codec.toString());
549      long encodedBytes = codec.getSize();
550      outputTuple("Encoded bytes", INT_FORMAT, encodedBytes);
551      outputSavings("Key encoding", encodedBytes - totalValueLength,
552          rawBytes - totalValueLength);
553      outputSavings("Total encoding", encodedBytes, rawBytes);
554
555      int encodedCompressedSize = codec.getEncodedCompressedSize(
556          compressionAlgorithm, compressor);
557      outputTuple("Encoding + " + comprAlgo + " size", INT_FORMAT,
558          encodedCompressedSize);
559      outputSavings("Encoding + " + comprAlgo, encodedCompressedSize, rawBytes);
560      outputSavings("Encoding with " + comprAlgo, encodedCompressedSize,
561          compressedSize);
562
563      System.out.println();
564    }
565  }
566
567  private void outputTuplePct(String caption, long size) {
568    outputTuple(caption, INT_FORMAT + " (" + PCT_FORMAT + ")",
569        size, size * 100.0 / rawKVs.length);
570  }
571
572  private void outputSavings(String caption, long part, long whole) {
573    double pct = 100.0 * (1 - 1.0 * part / whole);
574    double times = whole * 1.0 / part;
575    outputTuple(caption + " savings", PCT_FORMAT + " (%.2f x)",
576        pct, times);
577  }
578
579  /**
580   * Test a data block encoder on the given HFile. Output results to console.
581   * @param kvLimit The limit of KeyValue which will be analyzed.
582   * @param hfilePath an HFile path on the file system.
583   * @param compressionName Compression algorithm used for comparison.
584   * @param doBenchmark Run performance benchmarks.
585   * @param doVerify Verify correctness.
586   * @throws IOException When pathName is incorrect.
587   */
588  public static void testCodecs(Configuration conf, int kvLimit,
589      String hfilePath, String compressionName, boolean doBenchmark,
590      boolean doVerify) throws IOException {
591    // create environment
592    Path path = new Path(hfilePath);
593    CacheConfig cacheConf = new CacheConfig(conf);
594    FileSystem fs = FileSystem.get(conf);
595    HStoreFile hsf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, true);
596    hsf.initReader();
597    StoreFileReader reader = hsf.getReader();
598    reader.loadFileInfo();
599    KeyValueScanner scanner = reader.getStoreFileScanner(true, true, false, 0, 0, false);
600
601    // run the utilities
602    DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
603    int majorVersion = reader.getHFileVersion();
604    comp.useHBaseChecksum = majorVersion > 2 ||
605      (majorVersion == 2 &&
606       reader.getHFileMinorVersion() >= HFileReaderImpl.MINOR_VERSION_WITH_CHECKSUM);
607    comp.checkStatistics(scanner, kvLimit);
608    if (doVerify) {
609      comp.verifyCodecs(scanner, kvLimit);
610    }
611    if (doBenchmark) {
612      comp.benchmarkCodecs();
613    }
614    comp.displayStatistics();
615
616    // cleanup
617    scanner.close();
618    reader.close(cacheConf.shouldEvictOnClose());
619  }
620
621  private static void printUsage(Options options) {
622    System.err.println("Usage:");
623    System.err.println(String.format("./hbase %s <options>",
624        DataBlockEncodingTool.class.getName()));
625    System.err.println("Options:");
626    for (Object it : options.getOptions()) {
627      Option opt = (Option) it;
628      if (opt.hasArg()) {
629        System.err.println(String.format("-%s %s: %s", opt.getOpt(),
630            opt.getArgName(), opt.getDescription()));
631      } else {
632        System.err.println(String.format("-%s: %s", opt.getOpt(),
633            opt.getDescription()));
634      }
635    }
636  }
637
638  /**
639   * A command line interface to benchmarks. Parses command-line arguments and
640   * runs the appropriate benchmarks.
641   * @param args Should have length at least 1 and holds the file path to HFile.
642   * @throws IOException If you specified the wrong file.
643   */
644  public static void main(final String[] args) throws IOException {
645    // set up user arguments
646    Options options = new Options();
647    options.addOption(OPT_HFILE_NAME, true, "HFile to analyse (REQUIRED)");
648    options.getOption(OPT_HFILE_NAME).setArgName("FILENAME");
649    options.addOption(OPT_KV_LIMIT, true,
650        "Maximum number of KeyValues to process. A benchmark stops running " +
651        "after iterating over this many KV pairs.");
652    options.getOption(OPT_KV_LIMIT).setArgName("NUMBER");
653    options.addOption(OPT_MEASURE_THROUGHPUT, false,
654        "Measure read throughput");
655    options.addOption(OPT_OMIT_CORRECTNESS_TEST, false,
656        "Omit corectness tests.");
657    options.addOption(OPT_ENCODING_ALGORITHM, true,
658        "What kind of compression algorithm use for comparison.");
659    options.addOption(OPT_BENCHMARK_N_TIMES,
660        true, "Number of times to run each benchmark. Default value: " +
661            DEFAULT_BENCHMARK_N_TIMES);
662    options.addOption(OPT_BENCHMARK_N_OMIT, true,
663        "Number of first runs of every benchmark to exclude from "
664            + "statistics (" + DEFAULT_BENCHMARK_N_OMIT
665            + " by default, so that " + "only the last "
666            + (DEFAULT_BENCHMARK_N_TIMES - DEFAULT_BENCHMARK_N_OMIT)
667            + " times are included in statistics.)");
668
669    // parse arguments
670    CommandLineParser parser = new PosixParser();
671    CommandLine cmd = null;
672    try {
673      cmd = parser.parse(options, args);
674    } catch (ParseException e) {
675      System.err.println("Could not parse arguments!");
676      System.exit(-1);
677      return; // avoid warning
678    }
679
680    int kvLimit = Integer.MAX_VALUE;
681    if (cmd.hasOption(OPT_KV_LIMIT)) {
682      kvLimit = Integer.parseInt(cmd.getOptionValue(OPT_KV_LIMIT));
683    }
684
685    // basic argument sanity checks
686    if (!cmd.hasOption(OPT_HFILE_NAME)) {
687      LOG.error("Please specify HFile name using the " + OPT_HFILE_NAME
688          + " option");
689      printUsage(options);
690      System.exit(-1);
691    }
692
693    String pathName = cmd.getOptionValue(OPT_HFILE_NAME);
694    String compressionName = DEFAULT_COMPRESSION.getName();
695    if (cmd.hasOption(OPT_ENCODING_ALGORITHM)) {
696      compressionName =
697          cmd.getOptionValue(OPT_ENCODING_ALGORITHM).toLowerCase(Locale.ROOT);
698    }
699    boolean doBenchmark = cmd.hasOption(OPT_MEASURE_THROUGHPUT);
700    boolean doVerify = !cmd.hasOption(OPT_OMIT_CORRECTNESS_TEST);
701
702    if (cmd.hasOption(OPT_BENCHMARK_N_TIMES)) {
703      benchmarkNTimes = Integer.valueOf(cmd.getOptionValue(
704          OPT_BENCHMARK_N_TIMES));
705    }
706    if (cmd.hasOption(OPT_BENCHMARK_N_OMIT)) {
707      benchmarkNOmit =
708          Integer.valueOf(cmd.getOptionValue(OPT_BENCHMARK_N_OMIT));
709    }
710    if (benchmarkNTimes < benchmarkNOmit) {
711      LOG.error("The number of times to run each benchmark ("
712          + benchmarkNTimes
713          + ") must be greater than the number of benchmark runs to exclude "
714          + "from statistics (" + benchmarkNOmit + ")");
715      System.exit(1);
716    }
717    LOG.info("Running benchmark " + benchmarkNTimes + " times. " +
718        "Excluding the first " + benchmarkNOmit + " times from statistics.");
719
720    final Configuration conf = HBaseConfiguration.create();
721    try {
722      testCodecs(conf, kvLimit, pathName, compressionName, doBenchmark,
723          doVerify);
724    } finally {
725      (new CacheConfig(conf)).getBlockCache().shutdown();
726    }
727  }
728
729}