001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017package org.apache.hadoop.hbase.util; 018 019import java.util.Random; 020 021import org.apache.yetus.audience.InterfaceAudience; 022import org.slf4j.Logger; 023import org.slf4j.LoggerFactory; 024 025/** 026 * A generator of random keys and values for load testing. Keys are generated 027 * by converting numeric indexes to strings and prefixing them with an MD5 028 * hash. Values are generated by selecting value size in the configured range 029 * and generating a pseudo-random sequence of bytes seeded by key, column 030 * qualifier, and value size. 031 */ 032@InterfaceAudience.Private 033public class LoadTestKVGenerator { 034 035 private static final Logger LOG = LoggerFactory.getLogger(LoadTestKVGenerator.class); 036 private static int logLimit = 10; 037 038 /** A random number generator for determining value size */ 039 private Random randomForValueSize = new Random(); 040 041 private final int minValueSize; 042 private final int maxValueSize; 043 044 public LoadTestKVGenerator(int minValueSize, int maxValueSize) { 045 if (minValueSize <= 0 || maxValueSize <= 0) { 046 throw new IllegalArgumentException("Invalid min/max value sizes: " + 047 minValueSize + ", " + maxValueSize); 048 } 049 this.minValueSize = minValueSize; 050 this.maxValueSize = maxValueSize; 051 } 052 053 /** 054 * Verifies that the given byte array is the same as what would be generated 055 * for the given seed strings (row/cf/column/...). We are assuming that the 056 * value size is correct, and only verify the actual bytes. However, if the 057 * min/max value sizes are set sufficiently high, an accidental match should be 058 * extremely improbable. 059 */ 060 public static boolean verify(byte[] value, byte[]... seedStrings) { 061 byte[] expectedData = getValueForRowColumn(value.length, seedStrings); 062 boolean equals = Bytes.equals(expectedData, value); 063 if (!equals && LOG.isDebugEnabled() && logLimit > 0) { 064 LOG.debug("verify failed, expected value: " + Bytes.toStringBinary(expectedData) 065 + " actual value: "+ Bytes.toStringBinary(value)); 066 logLimit--; // this is not thread safe, but at worst we will have more logging 067 } 068 return equals; 069 } 070 071 /** 072 * Converts the given key to string, and prefixes it with the MD5 hash of 073 * the index's string representation. 074 */ 075 public static String md5PrefixedKey(long key) { 076 String stringKey = Long.toString(key); 077 String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey)); 078 079 // flip the key to randomize 080 return md5hash + "-" + stringKey; 081 } 082 083 /** 084 * Generates a value for the given key index and column qualifier. Size is 085 * selected randomly in the configured range. The generated value depends 086 * only on the combination of the strings passed (key/cf/column/...) and the selected 087 * value size. This allows to verify the actual value bytes when reading, as done 088 * in {#verify(byte[], byte[]...)} 089 * This method is as thread-safe as Random class. It appears that the worst bug ever 090 * found with the latter is that multiple threads will get some duplicate values, which 091 * we don't care about. 092 */ 093 public byte[] generateRandomSizeValue(byte[]... seedStrings) { 094 int dataSize = minValueSize; 095 if(minValueSize != maxValueSize) { 096 dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize)); 097 } 098 return getValueForRowColumn(dataSize, seedStrings); 099 } 100 101 /** 102 * Generates random bytes of the given size for the given row and column 103 * qualifier. The random seed is fully determined by these parameters. 104 */ 105 private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) { 106 long seed = dataSize; 107 for (byte[] str : seedStrings) { 108 final String bytesString = Bytes.toString(str); 109 if (bytesString != null) { 110 seed += bytesString.hashCode(); 111 } 112 } 113 Random seededRandom = new Random(seed); 114 byte[] randomBytes = new byte[dataSize]; 115 seededRandom.nextBytes(randomBytes); 116 return randomBytes; 117 } 118 119}