001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.Arrays; 022import java.util.Random; 023import java.util.concurrent.ThreadLocalRandom; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.FileSystem; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.HBaseConfiguration; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.KeyValue; 030import org.apache.hadoop.hbase.io.compress.Compression; 031import org.apache.hadoop.hbase.io.hfile.CacheConfig; 032import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex; 033import org.apache.hadoop.hbase.io.hfile.HFileContext; 034import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 035import org.apache.hadoop.hbase.util.BloomFilterFactory; 036import org.apache.hadoop.hbase.util.BloomFilterUtil; 037import org.apache.hadoop.io.BytesWritable; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 042import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; 043import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 044import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 045import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 046import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; 047 048/** 049 * Creates an HFile with random key/value pairs. 050 */ 051public class CreateRandomStoreFile { 052 053 /** 054 * As much as this number of bytes can be added or subtracted from key/value lengths. 055 */ 056 private static final int LEN_VARIATION = 5; 057 058 private static final Logger LOG = LoggerFactory.getLogger(CreateRandomStoreFile.class); 059 private static final String OUTPUT_DIR_OPTION = "o"; 060 private static final String NUM_KV_OPTION = "n"; 061 private static final String HFILE_VERSION_OPTION = "h"; 062 private static final String KEY_SIZE_OPTION = "k"; 063 private static final String VALUE_SIZE_OPTION = "v"; 064 private static final String COMPRESSION_OPTION = "c"; 065 private static final String BLOOM_FILTER_OPTION = "bf"; 066 private static final String BLOOM_FILTER_PARAM_OPTION = "bfp"; 067 private static final String BLOCK_SIZE_OPTION = "bs"; 068 private static final String BLOOM_BLOCK_SIZE_OPTION = "bfbs"; 069 private static final String INDEX_BLOCK_SIZE_OPTION = "ibs"; 070 071 /** The exit code this command-line tool returns on failure */ 072 private static final int EXIT_FAILURE = 1; 073 074 /** The number of valid key types in a store file */ 075 private static final int NUM_VALID_KEY_TYPES = KeyValue.Type.values().length - 2; 076 077 private Options options = new Options(); 078 079 private int keyPrefixLen, keyLen, rowLen, cfLen, valueLen; 080 081 /** 082 * Runs the tools. 083 * @param args command-line arguments 084 * @return true in case of success n 085 */ 086 public boolean run(String[] args) throws IOException { 087 options.addOption(OUTPUT_DIR_OPTION, "output_dir", true, "Output directory"); 088 options.addOption(NUM_KV_OPTION, "num_kv", true, "Number of key/value pairs"); 089 options.addOption(KEY_SIZE_OPTION, "key_size", true, "Average key size"); 090 options.addOption(VALUE_SIZE_OPTION, "value_size", true, "Average value size"); 091 options.addOption(HFILE_VERSION_OPTION, "hfile_version", true, "HFile version to create"); 092 options.addOption(COMPRESSION_OPTION, "compression", true, 093 " Compression type, one of " + Arrays.toString(Compression.Algorithm.values())); 094 options.addOption(BLOOM_FILTER_OPTION, "bloom_filter", true, 095 "Bloom filter type, one of " + Arrays.toString(BloomType.values())); 096 options.addOption(BLOOM_FILTER_PARAM_OPTION, "bloom_param", true, 097 "the parameter of the bloom filter"); 098 options.addOption(BLOCK_SIZE_OPTION, "block_size", true, "HFile block size"); 099 options.addOption(BLOOM_BLOCK_SIZE_OPTION, "bloom_block_size", true, 100 "Compound Bloom filters block size"); 101 options.addOption(INDEX_BLOCK_SIZE_OPTION, "index_block_size", true, "Index block size"); 102 103 if (args.length == 0) { 104 HelpFormatter formatter = new HelpFormatter(); 105 formatter.printHelp(CreateRandomStoreFile.class.getSimpleName(), options, true); 106 return false; 107 } 108 109 CommandLineParser parser = new PosixParser(); 110 CommandLine cmdLine; 111 try { 112 cmdLine = parser.parse(options, args); 113 } catch (ParseException ex) { 114 LOG.error(ex.toString(), ex); 115 return false; 116 } 117 118 if (!cmdLine.hasOption(OUTPUT_DIR_OPTION)) { 119 LOG.error("Output directory is not specified"); 120 return false; 121 } 122 123 if (!cmdLine.hasOption(NUM_KV_OPTION)) { 124 LOG.error("The number of keys/values not specified"); 125 return false; 126 } 127 128 if (!cmdLine.hasOption(KEY_SIZE_OPTION)) { 129 LOG.error("Key size is not specified"); 130 return false; 131 } 132 133 if (!cmdLine.hasOption(VALUE_SIZE_OPTION)) { 134 LOG.error("Value size not specified"); 135 return false; 136 } 137 138 Configuration conf = HBaseConfiguration.create(); 139 140 Path outputDir = new Path(cmdLine.getOptionValue(OUTPUT_DIR_OPTION)); 141 142 long numKV = Long.parseLong(cmdLine.getOptionValue(NUM_KV_OPTION)); 143 configureKeyValue(numKV, Integer.parseInt(cmdLine.getOptionValue(KEY_SIZE_OPTION)), 144 Integer.parseInt(cmdLine.getOptionValue(VALUE_SIZE_OPTION))); 145 146 FileSystem fs = FileSystem.get(conf); 147 148 Compression.Algorithm compr = Compression.Algorithm.NONE; 149 if (cmdLine.hasOption(COMPRESSION_OPTION)) { 150 compr = Compression.Algorithm.valueOf(cmdLine.getOptionValue(COMPRESSION_OPTION)); 151 } 152 153 BloomType bloomType = BloomType.NONE; 154 if (cmdLine.hasOption(BLOOM_FILTER_OPTION)) { 155 bloomType = BloomType.valueOf(cmdLine.getOptionValue(BLOOM_FILTER_OPTION)); 156 } 157 158 if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) { 159 if (!cmdLine.hasOption(BLOOM_FILTER_PARAM_OPTION)) { 160 LOG.error("the parameter of bloom filter is not specified"); 161 return false; 162 } else { 163 conf.set(BloomFilterUtil.PREFIX_LENGTH_KEY, 164 cmdLine.getOptionValue(BLOOM_FILTER_PARAM_OPTION)); 165 } 166 } 167 168 int blockSize = HConstants.DEFAULT_BLOCKSIZE; 169 if (cmdLine.hasOption(BLOCK_SIZE_OPTION)) 170 blockSize = Integer.valueOf(cmdLine.getOptionValue(BLOCK_SIZE_OPTION)); 171 172 if (cmdLine.hasOption(BLOOM_BLOCK_SIZE_OPTION)) { 173 conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, 174 Integer.valueOf(cmdLine.getOptionValue(BLOOM_BLOCK_SIZE_OPTION))); 175 } 176 177 if (cmdLine.hasOption(INDEX_BLOCK_SIZE_OPTION)) { 178 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, 179 Integer.valueOf(cmdLine.getOptionValue(INDEX_BLOCK_SIZE_OPTION))); 180 } 181 182 HFileContext meta = 183 new HFileContextBuilder().withCompression(compr).withBlockSize(blockSize).build(); 184 StoreFileWriter sfw = 185 new StoreFileWriter.Builder(conf, new CacheConfig(conf), fs).withOutputDir(outputDir) 186 .withBloomType(bloomType).withMaxKeyCount(numKV).withFileContext(meta).build(); 187 188 LOG.info("Writing " + numKV + " key/value pairs"); 189 for (long i = 0; i < numKV; ++i) { 190 sfw.append(generateKeyValue(i)); 191 } 192 193 int numMetaBlocks = ThreadLocalRandom.current().nextInt(10) + 1; 194 LOG.info("Writing " + numMetaBlocks + " meta blocks"); 195 for (int metaI = 0; metaI < numMetaBlocks; ++metaI) { 196 sfw.getHFileWriter().appendMetaBlock(generateString(), new BytesWritable(generateValue())); 197 } 198 sfw.close(); 199 200 Path storeFilePath = sfw.getPath(); 201 long fileSize = fs.getFileStatus(storeFilePath).getLen(); 202 LOG.info("Created {}, {} bytes, compression={}", storeFilePath, fileSize, compr.toString()); 203 204 return true; 205 } 206 207 private void configureKeyValue(long numKV, int keyLen, int valueLen) { 208 numKV = Math.abs(numKV); 209 keyLen = Math.abs(keyLen); 210 keyPrefixLen = 0; 211 while (numKV != 0) { 212 numKV >>>= 8; 213 ++keyPrefixLen; 214 } 215 216 this.keyLen = Math.max(keyPrefixLen, keyLen); 217 this.valueLen = valueLen; 218 219 // Arbitrarily split the key into row, column family, and qualifier. 220 rowLen = keyPrefixLen / 3; 221 cfLen = keyPrefixLen / 4; 222 } 223 224 private int nextInRange(int range) { 225 return ThreadLocalRandom.current().nextInt(2 * range + 1) - range; 226 } 227 228 public KeyValue generateKeyValue(long i) { 229 byte[] k = generateKey(i); 230 byte[] v = generateValue(); 231 Random rand = ThreadLocalRandom.current(); 232 return new KeyValue(k, 0, rowLen, k, rowLen, cfLen, k, rowLen + cfLen, 233 k.length - rowLen - cfLen, rand.nextLong(), generateKeyType(rand), v, 0, v.length); 234 } 235 236 public static KeyValue.Type generateKeyType(Random rand) { 237 if (rand.nextBoolean()) { 238 // Let's make half of KVs puts. 239 return KeyValue.Type.Put; 240 } else { 241 KeyValue.Type keyType = KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)]; 242 if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum) { 243 throw new RuntimeException("Generated an invalid key type: " + keyType + ". " 244 + "Probably the layout of KeyValue.Type has changed."); 245 } 246 return keyType; 247 } 248 } 249 250 private String generateString() { 251 Random rand = ThreadLocalRandom.current(); 252 StringBuilder sb = new StringBuilder(); 253 for (int i = 0; i < rand.nextInt(10); ++i) { 254 sb.append((char) ('A' + rand.nextInt(26))); 255 } 256 return sb.toString(); 257 } 258 259 private byte[] generateKey(long i) { 260 Random rand = ThreadLocalRandom.current(); 261 byte[] k = new byte[Math.max(keyPrefixLen, keyLen + nextInRange(LEN_VARIATION))]; 262 for (int pos = keyPrefixLen - 1; pos >= 0; --pos) { 263 k[pos] = (byte) (i & 0xFF); 264 i >>>= 8; 265 } 266 for (int pos = keyPrefixLen; pos < k.length; ++pos) { 267 k[pos] = (byte) rand.nextInt(256); 268 } 269 return k; 270 } 271 272 private byte[] generateValue() { 273 Random rand = ThreadLocalRandom.current(); 274 byte[] v = new byte[Math.max(1, valueLen + nextInRange(LEN_VARIATION))]; 275 for (int i = 0; i < v.length; ++i) { 276 v[i] = (byte) rand.nextInt(256); 277 } 278 return v; 279 } 280 281 public static void main(String[] args) { 282 CreateRandomStoreFile app = new CreateRandomStoreFile(); 283 try { 284 if (!app.run(args)) System.exit(EXIT_FAILURE); 285 } catch (IOException ex) { 286 LOG.error(ex.toString(), ex); 287 System.exit(EXIT_FAILURE); 288 } 289 290 } 291 292}