001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations
015 * under the License.
016 */
017package org.apache.hadoop.hbase.util;
018
019import java.util.Random;
020
021import org.apache.yetus.audience.InterfaceAudience;
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024
025/**
026 * A generator of random keys and values for load testing. Keys are generated
027 * by converting numeric indexes to strings and prefixing them with an MD5
028 * hash. Values are generated by selecting value size in the configured range
029 * and generating a pseudo-random sequence of bytes seeded by key, column
030 * qualifier, and value size.
031 */
032@InterfaceAudience.Private
033public class LoadTestKVGenerator {
034
035  private static final Logger LOG = LoggerFactory.getLogger(LoadTestKVGenerator.class);
036  private static int logLimit = 10;
037
038  /** A random number generator for determining value size */
039  private Random randomForValueSize = new Random();
040
041  private final int minValueSize;
042  private final int maxValueSize;
043
044  public LoadTestKVGenerator(int minValueSize, int maxValueSize) {
045    if (minValueSize <= 0 || maxValueSize <= 0) {
046      throw new IllegalArgumentException("Invalid min/max value sizes: " +
047          minValueSize + ", " + maxValueSize);
048    }
049    this.minValueSize = minValueSize;
050    this.maxValueSize = maxValueSize;
051  }
052
053  /**
054   * Verifies that the given byte array is the same as what would be generated
055   * for the given seed strings (row/cf/column/...). We are assuming that the
056   * value size is correct, and only verify the actual bytes. However, if the
057   * min/max value sizes are set sufficiently high, an accidental match should be
058   * extremely improbable.
059   */
060  public static boolean verify(byte[] value, byte[]... seedStrings) {
061    byte[] expectedData = getValueForRowColumn(value.length, seedStrings);
062    boolean equals = Bytes.equals(expectedData, value);
063    if (!equals && LOG.isDebugEnabled() && logLimit > 0) {
064      LOG.debug("verify failed, expected value: " + Bytes.toStringBinary(expectedData)
065        + " actual value: "+ Bytes.toStringBinary(value));
066      logLimit--; // this is not thread safe, but at worst we will have more logging
067    }
068    return equals;
069  }
070
071  /**
072   * Converts the given key to string, and prefixes it with the MD5 hash of
073   * the index's string representation.
074   */
075  public static String md5PrefixedKey(long key) {
076    String stringKey = Long.toString(key);
077    String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey));
078
079    // flip the key to randomize
080    return md5hash + "-" + stringKey;
081  }
082
083  /**
084   * Generates a value for the given key index and column qualifier. Size is
085   * selected randomly in the configured range. The generated value depends
086   * only on the combination of the strings passed (key/cf/column/...) and the selected
087   * value size. This allows to verify the actual value bytes when reading, as done
088   * in {#verify(byte[], byte[]...)}
089   * This method is as thread-safe as Random class. It appears that the worst bug ever
090   * found with the latter is that multiple threads will get some duplicate values, which
091   * we don't care about.
092   */
093  public byte[] generateRandomSizeValue(byte[]... seedStrings) {
094    int dataSize = minValueSize;
095    if(minValueSize != maxValueSize) {
096      dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
097    }
098    return getValueForRowColumn(dataSize, seedStrings);
099  }
100
101  /**
102   * Generates random bytes of the given size for the given row and column
103   * qualifier. The random seed is fully determined by these parameters.
104   */
105  private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) {
106    long seed = dataSize;
107    for (byte[] str : seedStrings) {
108      final String bytesString = Bytes.toString(str);
109      if (bytesString != null) {
110        seed += bytesString.hashCode();
111      }
112    }
113    Random seededRandom = new Random(seed);
114    byte[] randomBytes = new byte[dataSize];
115    seededRandom.nextBytes(randomBytes);
116    return randomBytes;
117  }
118
119}