001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations
015 * under the License.
016 */
017package org.apache.hadoop.hbase.regionserver;
018
019import java.io.IOException;
020import java.util.ArrayList;
021import java.util.List;
022import java.util.Random;
023
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.HBaseTestingUtility;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.KeyValue;
030import org.apache.hadoop.hbase.KeyValueUtil;
031import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
032import org.apache.hadoop.hbase.io.hfile.CacheConfig;
033import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
034
035/**
036 * Test seek performance for encoded data blocks. Read an HFile and do several
037 * random seeks.
038 */
039public class EncodedSeekPerformanceTest {
040  private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0;
041  private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0;
042  /** Default number of seeks which will be used in benchmark. */
043  public static int DEFAULT_NUMBER_OF_SEEKS = 10000;
044
045  private final HBaseTestingUtility testingUtility = new HBaseTestingUtility();
046  private Configuration configuration = testingUtility.getConfiguration();
047  private CacheConfig cacheConf = new CacheConfig(configuration);
048  private Random randomizer;
049  private int numberOfSeeks;
050
051  /** Use this benchmark with default options */
052  public EncodedSeekPerformanceTest() {
053    configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f);
054    randomizer = new Random(42L);
055    numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS;
056  }
057
058  private List<Cell> prepareListOfTestSeeks(Path path) throws IOException {
059    List<Cell> allKeyValues = new ArrayList<>();
060
061    // read all of the key values
062    HStoreFile storeFile = new HStoreFile(testingUtility.getTestFileSystem(),
063        path, configuration, cacheConf, BloomType.NONE, true);
064    storeFile.initReader();
065    StoreFileReader reader = storeFile.getReader();
066    StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false);
067    Cell current;
068
069    scanner.seek(KeyValue.LOWESTKEY);
070    while (null != (current = scanner.next())) {
071      allKeyValues.add(current);
072    }
073
074    storeFile.closeStoreFile(cacheConf.shouldEvictOnClose());
075
076    // pick seeks by random
077    List<Cell> seeks = new ArrayList<>();
078    for (int i = 0; i < numberOfSeeks; ++i) {
079      Cell keyValue = allKeyValues.get(
080          randomizer.nextInt(allKeyValues.size()));
081      seeks.add(keyValue);
082    }
083
084    clearBlockCache();
085
086    return seeks;
087  }
088
089  private void runTest(Path path, DataBlockEncoding blockEncoding,
090      List<Cell> seeks) throws IOException {
091    // read all of the key values
092    HStoreFile storeFile = new HStoreFile(testingUtility.getTestFileSystem(),
093      path, configuration, cacheConf, BloomType.NONE, true);
094    storeFile.initReader();
095    long totalSize = 0;
096
097    StoreFileReader reader = storeFile.getReader();
098    StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false);
099
100    long startReadingTime = System.nanoTime();
101    Cell current;
102    scanner.seek(KeyValue.LOWESTKEY);
103    while (null != (current = scanner.next())) { // just iterate it!
104      if (KeyValueUtil.ensureKeyValue(current).getLength() < 0) {
105        throw new IOException("Negative KV size: " + current);
106      }
107      totalSize += KeyValueUtil.ensureKeyValue(current).getLength();
108    }
109    long finishReadingTime = System.nanoTime();
110
111    // do seeks
112    long startSeeksTime = System.nanoTime();
113    for (Cell keyValue : seeks) {
114      scanner.seek(keyValue);
115      Cell toVerify = scanner.next();
116      if (!keyValue.equals(toVerify)) {
117        System.out.println(String.format("KeyValue doesn't match:\n" + "Orig key: %s\n"
118            + "Ret key:  %s", KeyValueUtil.ensureKeyValue(keyValue).getKeyString(), KeyValueUtil
119            .ensureKeyValue(toVerify).getKeyString()));
120        break;
121      }
122    }
123    long finishSeeksTime = System.nanoTime();
124    if (finishSeeksTime < startSeeksTime) {
125      throw new AssertionError("Finish time " + finishSeeksTime +
126          " is earlier than start time " + startSeeksTime);
127    }
128
129    // write some stats
130    double readInMbPerSec = (totalSize * NANOSEC_IN_SEC) /
131        (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime));
132    double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) /
133        (finishSeeksTime - startSeeksTime);
134
135    storeFile.closeStoreFile(cacheConf.shouldEvictOnClose());
136    clearBlockCache();
137
138    System.out.println(blockEncoding);
139    System.out.printf("  Read speed:       %8.2f (MB/s)\n", readInMbPerSec);
140    System.out.printf("  Seeks per second: %8.2f (#/s)\n", seeksPerSec);
141    System.out.printf("  Total KV size:    %d\n", totalSize);
142  }
143
144  /**
145   * @param path Path to the HFile which will be used.
146   * @param encodings the data block encoding algorithms to use
147   * @throws IOException if there is a bug while reading from disk
148   */
149  public void runTests(Path path, DataBlockEncoding[] encodings)
150      throws IOException {
151    List<Cell> seeks = prepareListOfTestSeeks(path);
152
153    for (DataBlockEncoding blockEncoding : encodings) {
154      runTest(path, blockEncoding, seeks);
155    }
156  }
157
158  /**
159   * Command line interface:
160   * @param args Takes one argument - file size.
161   * @throws IOException if there is a bug while reading from disk
162   */
163  public static void main(final String[] args) throws IOException {
164    if (args.length < 1) {
165      printUsage();
166      System.exit(-1);
167    }
168
169    Path path = new Path(args[0]);
170
171    // TODO, this test doesn't work as expected any more. Need to fix.
172    EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest();
173    utility.runTests(path, DataBlockEncoding.values());
174
175    System.exit(0);
176  }
177
178  private static void printUsage() {
179    System.out.println("Usage: one argument, name of the HFile");
180  }
181
182  private void clearBlockCache() {
183    ((LruBlockCache) cacheConf.getBlockCache()).clearCache();
184  }
185}