001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Random;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.HBaseTestingUtil;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.KeyValue;
030import org.apache.hadoop.hbase.KeyValueUtil;
031import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
032import org.apache.hadoop.hbase.io.hfile.CacheConfig;
033import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
034
035/**
036 * Test seek performance for encoded data blocks. Read an HFile and do several random seeks.
037 */
038public class EncodedSeekPerformanceTest {
039  private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0;
040  private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0;
041  /** Default number of seeks which will be used in benchmark. */
042  public static int DEFAULT_NUMBER_OF_SEEKS = 10000;
043
044  private final HBaseTestingUtil testingUtility = new HBaseTestingUtil();
045  private Configuration configuration = testingUtility.getConfiguration();
046  private CacheConfig cacheConf = new CacheConfig(configuration);
047  private Random randomizer;
048  private int numberOfSeeks;
049
050  /** Use this benchmark with default options */
051  public EncodedSeekPerformanceTest() {
052    configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f);
053    randomizer = new Random(42L);
054    numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS;
055  }
056
057  private List<Cell> prepareListOfTestSeeks(Path path) throws IOException {
058    List<Cell> allKeyValues = new ArrayList<>();
059
060    // read all of the key values
061    HStoreFile storeFile = new HStoreFile(testingUtility.getTestFileSystem(), path, configuration,
062      cacheConf, BloomType.NONE, true);
063    storeFile.initReader();
064    StoreFileReader reader = storeFile.getReader();
065    StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false);
066    Cell current;
067
068    scanner.seek(KeyValue.LOWESTKEY);
069    while (null != (current = scanner.next())) {
070      allKeyValues.add(current);
071    }
072
073    storeFile.closeStoreFile(cacheConf.shouldEvictOnClose());
074
075    // pick seeks by random
076    List<Cell> seeks = new ArrayList<>();
077    for (int i = 0; i < numberOfSeeks; ++i) {
078      Cell keyValue = allKeyValues.get(randomizer.nextInt(allKeyValues.size()));
079      seeks.add(keyValue);
080    }
081
082    clearBlockCache();
083
084    return seeks;
085  }
086
087  private void runTest(Path path, DataBlockEncoding blockEncoding, List<Cell> seeks)
088    throws IOException {
089    // read all of the key values
090    HStoreFile storeFile = new HStoreFile(testingUtility.getTestFileSystem(), path, configuration,
091      cacheConf, BloomType.NONE, true);
092    storeFile.initReader();
093    long totalSize = 0;
094
095    StoreFileReader reader = storeFile.getReader();
096    StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false);
097
098    long startReadingTime = System.nanoTime();
099    Cell current;
100    scanner.seek(KeyValue.LOWESTKEY);
101    while (null != (current = scanner.next())) { // just iterate it!
102      if (KeyValueUtil.ensureKeyValue(current).getLength() < 0) {
103        throw new IOException("Negative KV size: " + current);
104      }
105      totalSize += KeyValueUtil.ensureKeyValue(current).getLength();
106    }
107    long finishReadingTime = System.nanoTime();
108
109    // do seeks
110    long startSeeksTime = System.nanoTime();
111    for (Cell keyValue : seeks) {
112      scanner.seek(keyValue);
113      Cell toVerify = scanner.next();
114      if (!keyValue.equals(toVerify)) {
115        System.out
116          .println(String.format("KeyValue doesn't match:\n" + "Orig key: %s\n" + "Ret key:  %s",
117            KeyValueUtil.ensureKeyValue(keyValue).getKeyString(),
118            KeyValueUtil.ensureKeyValue(toVerify).getKeyString()));
119        break;
120      }
121    }
122    long finishSeeksTime = System.nanoTime();
123    if (finishSeeksTime < startSeeksTime) {
124      throw new AssertionError(
125        "Finish time " + finishSeeksTime + " is earlier than start time " + startSeeksTime);
126    }
127
128    // write some stats
129    double readInMbPerSec =
130      (totalSize * NANOSEC_IN_SEC) / (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime));
131    double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) / (finishSeeksTime - startSeeksTime);
132
133    storeFile.closeStoreFile(cacheConf.shouldEvictOnClose());
134    clearBlockCache();
135
136    System.out.println(blockEncoding);
137    System.out.printf("  Read speed:       %8.2f (MB/s)\n", readInMbPerSec);
138    System.out.printf("  Seeks per second: %8.2f (#/s)\n", seeksPerSec);
139    System.out.printf("  Total KV size:    %d\n", totalSize);
140  }
141
142  /**
143   * @param path      Path to the HFile which will be used.
144   * @param encodings the data block encoding algorithms to use
145   * @throws IOException if there is a bug while reading from disk
146   */
147  public void runTests(Path path, DataBlockEncoding[] encodings) throws IOException {
148    List<Cell> seeks = prepareListOfTestSeeks(path);
149
150    for (DataBlockEncoding blockEncoding : encodings) {
151      runTest(path, blockEncoding, seeks);
152    }
153  }
154
155  /**
156   * Command line interface:
157   * @param args Takes one argument - file size.
158   * @throws IOException if there is a bug while reading from disk
159   */
160  public static void main(final String[] args) throws IOException {
161    if (args.length < 1) {
162      printUsage();
163      System.exit(-1);
164    }
165
166    Path path = new Path(args[0]);
167
168    // TODO, this test doesn't work as expected any more. Need to fix.
169    EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest();
170    utility.runTests(path, DataBlockEncoding.values());
171
172    System.exit(0);
173  }
174
175  private static void printUsage() {
176    System.out.println("Usage: one argument, name of the HFile");
177  }
178
179  private void clearBlockCache() {
180    ((LruBlockCache) cacheConf.getBlockCache().get()).clearCache();
181  }
182}