001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.util.Random; 021import org.apache.yetus.audience.InterfaceAudience; 022import org.slf4j.Logger; 023import org.slf4j.LoggerFactory; 024 025/** 026 * A generator of random keys and values for load testing. Keys are generated by converting numeric 027 * indexes to strings and prefixing them with an MD5 hash. Values are generated by selecting value 028 * size in the configured range and generating a pseudo-random sequence of bytes seeded by key, 029 * column qualifier, and value size. 030 */ 031@InterfaceAudience.Private 032public class LoadTestKVGenerator { 033 034 private static final Logger LOG = LoggerFactory.getLogger(LoadTestKVGenerator.class); 035 private static int logLimit = 10; 036 037 /** A random number generator for determining value size */ 038 private Random randomForValueSize = new Random(); // Seed may be set with Random#setSeed 039 040 private final int minValueSize; 041 private final int maxValueSize; 042 043 public LoadTestKVGenerator(int minValueSize, int maxValueSize) { 044 if (minValueSize <= 0 || maxValueSize <= 0) { 045 throw new IllegalArgumentException( 046 "Invalid min/max value sizes: " + minValueSize + ", " + maxValueSize); 047 } 048 this.minValueSize = minValueSize; 049 this.maxValueSize = maxValueSize; 050 } 051 052 /** 053 * Verifies that the given byte array is the same as what would be generated for the given seed 054 * strings (row/cf/column/...). We are assuming that the value size is correct, and only verify 055 * the actual bytes. However, if the min/max value sizes are set sufficiently high, an accidental 056 * match should be extremely improbable. 057 */ 058 public static boolean verify(byte[] value, byte[]... seedStrings) { 059 byte[] expectedData = getValueForRowColumn(value.length, seedStrings); 060 boolean equals = Bytes.equals(expectedData, value); 061 if (!equals && LOG.isDebugEnabled() && logLimit > 0) { 062 LOG.debug("verify failed, expected value: " + Bytes.toStringBinary(expectedData) 063 + " actual value: " + Bytes.toStringBinary(value)); 064 logLimit--; // this is not thread safe, but at worst we will have more logging 065 } 066 return equals; 067 } 068 069 /** 070 * Converts the given key to string, and prefixes it with the MD5 hash of the index's string 071 * representation. 072 */ 073 public static String md5PrefixedKey(long key) { 074 String stringKey = Long.toString(key); 075 String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey)); 076 077 // flip the key to randomize 078 return md5hash + "-" + stringKey; 079 } 080 081 /** 082 * Generates a value for the given key index and column qualifier. Size is selected randomly in 083 * the configured range. The generated value depends only on the combination of the strings passed 084 * (key/cf/column/...) and the selected value size. This allows to verify the actual value bytes 085 * when reading, as done in {#verify(byte[], byte[]...)} This method is as thread-safe as Random 086 * class. It appears that the worst bug ever found with the latter is that multiple threads will 087 * get some duplicate values, which we don't care about. 088 */ 089 public byte[] generateRandomSizeValue(byte[]... seedStrings) { 090 int dataSize = minValueSize; 091 if (minValueSize != maxValueSize) { 092 dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize)); 093 } 094 return getValueForRowColumn(dataSize, seedStrings); 095 } 096 097 /** 098 * Generates random bytes of the given size for the given row and column qualifier. The random 099 * seed is fully determined by these parameters. 100 */ 101 private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) { 102 long seed = dataSize; 103 for (byte[] str : seedStrings) { 104 final String bytesString = Bytes.toString(str); 105 if (bytesString != null) { 106 seed += bytesString.hashCode(); 107 } 108 } 109 Random seededRandom = new Random(seed); 110 byte[] randomBytes = new byte[dataSize]; 111 seededRandom.nextBytes(randomBytes); 112 return randomBytes; 113 } 114 115}