001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017package org.apache.hadoop.hbase.regionserver; 018 019import java.io.IOException; 020import java.util.ArrayList; 021import java.util.List; 022import java.util.Random; 023 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.Cell; 027import org.apache.hadoop.hbase.HBaseTestingUtility; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.KeyValue; 030import org.apache.hadoop.hbase.KeyValueUtil; 031import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 032import org.apache.hadoop.hbase.io.hfile.CacheConfig; 033import org.apache.hadoop.hbase.io.hfile.LruBlockCache; 034 035/** 036 * Test seek performance for encoded data blocks. Read an HFile and do several 037 * random seeks. 038 */ 039public class EncodedSeekPerformanceTest { 040 private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0; 041 private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0; 042 /** Default number of seeks which will be used in benchmark. */ 043 public static int DEFAULT_NUMBER_OF_SEEKS = 10000; 044 045 private final HBaseTestingUtility testingUtility = new HBaseTestingUtility(); 046 private Configuration configuration = testingUtility.getConfiguration(); 047 private CacheConfig cacheConf = new CacheConfig(configuration); 048 private Random randomizer; 049 private int numberOfSeeks; 050 051 /** Use this benchmark with default options */ 052 public EncodedSeekPerformanceTest() { 053 configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f); 054 randomizer = new Random(42L); 055 numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS; 056 } 057 058 private List<Cell> prepareListOfTestSeeks(Path path) throws IOException { 059 List<Cell> allKeyValues = new ArrayList<>(); 060 061 // read all of the key values 062 HStoreFile storeFile = new HStoreFile(testingUtility.getTestFileSystem(), 063 path, configuration, cacheConf, BloomType.NONE, true); 064 storeFile.initReader(); 065 StoreFileReader reader = storeFile.getReader(); 066 StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false); 067 Cell current; 068 069 scanner.seek(KeyValue.LOWESTKEY); 070 while (null != (current = scanner.next())) { 071 allKeyValues.add(current); 072 } 073 074 storeFile.closeStoreFile(cacheConf.shouldEvictOnClose()); 075 076 // pick seeks by random 077 List<Cell> seeks = new ArrayList<>(); 078 for (int i = 0; i < numberOfSeeks; ++i) { 079 Cell keyValue = allKeyValues.get( 080 randomizer.nextInt(allKeyValues.size())); 081 seeks.add(keyValue); 082 } 083 084 clearBlockCache(); 085 086 return seeks; 087 } 088 089 private void runTest(Path path, DataBlockEncoding blockEncoding, 090 List<Cell> seeks) throws IOException { 091 // read all of the key values 092 HStoreFile storeFile = new HStoreFile(testingUtility.getTestFileSystem(), 093 path, configuration, cacheConf, BloomType.NONE, true); 094 storeFile.initReader(); 095 long totalSize = 0; 096 097 StoreFileReader reader = storeFile.getReader(); 098 StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false); 099 100 long startReadingTime = System.nanoTime(); 101 Cell current; 102 scanner.seek(KeyValue.LOWESTKEY); 103 while (null != (current = scanner.next())) { // just iterate it! 104 if (KeyValueUtil.ensureKeyValue(current).getLength() < 0) { 105 throw new IOException("Negative KV size: " + current); 106 } 107 totalSize += KeyValueUtil.ensureKeyValue(current).getLength(); 108 } 109 long finishReadingTime = System.nanoTime(); 110 111 // do seeks 112 long startSeeksTime = System.nanoTime(); 113 for (Cell keyValue : seeks) { 114 scanner.seek(keyValue); 115 Cell toVerify = scanner.next(); 116 if (!keyValue.equals(toVerify)) { 117 System.out.println(String.format("KeyValue doesn't match:\n" + "Orig key: %s\n" 118 + "Ret key: %s", KeyValueUtil.ensureKeyValue(keyValue).getKeyString(), KeyValueUtil 119 .ensureKeyValue(toVerify).getKeyString())); 120 break; 121 } 122 } 123 long finishSeeksTime = System.nanoTime(); 124 if (finishSeeksTime < startSeeksTime) { 125 throw new AssertionError("Finish time " + finishSeeksTime + 126 " is earlier than start time " + startSeeksTime); 127 } 128 129 // write some stats 130 double readInMbPerSec = (totalSize * NANOSEC_IN_SEC) / 131 (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime)); 132 double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) / 133 (finishSeeksTime - startSeeksTime); 134 135 storeFile.closeStoreFile(cacheConf.shouldEvictOnClose()); 136 clearBlockCache(); 137 138 System.out.println(blockEncoding); 139 System.out.printf(" Read speed: %8.2f (MB/s)\n", readInMbPerSec); 140 System.out.printf(" Seeks per second: %8.2f (#/s)\n", seeksPerSec); 141 System.out.printf(" Total KV size: %d\n", totalSize); 142 } 143 144 /** 145 * @param path Path to the HFile which will be used. 146 * @param encodings the data block encoding algorithms to use 147 * @throws IOException if there is a bug while reading from disk 148 */ 149 public void runTests(Path path, DataBlockEncoding[] encodings) 150 throws IOException { 151 List<Cell> seeks = prepareListOfTestSeeks(path); 152 153 for (DataBlockEncoding blockEncoding : encodings) { 154 runTest(path, blockEncoding, seeks); 155 } 156 } 157 158 /** 159 * Command line interface: 160 * @param args Takes one argument - file size. 161 * @throws IOException if there is a bug while reading from disk 162 */ 163 public static void main(final String[] args) throws IOException { 164 if (args.length < 1) { 165 printUsage(); 166 System.exit(-1); 167 } 168 169 Path path = new Path(args[0]); 170 171 // TODO, this test doesn't work as expected any more. Need to fix. 172 EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest(); 173 utility.runTests(path, DataBlockEncoding.values()); 174 175 System.exit(0); 176 } 177 178 private static void printUsage() { 179 System.out.println("Usage: one argument, name of the HFile"); 180 } 181 182 private void clearBlockCache() { 183 ((LruBlockCache) cacheConf.getBlockCache()).clearCache(); 184 } 185}