001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.util.Random;
021import org.apache.hadoop.hbase.util.RandomDistribution;
022import org.apache.hadoop.io.BytesWritable;
023import org.apache.hadoop.io.WritableComparator;
024
025/**
026 * Generate random <key, value> pairs.
027 * <p>
028 * Copied from <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
029 * Remove after tfile is committed and use the tfile version of this class instead.
030 * </p>
031 */
032class KVGenerator {
033  private final Random random;
034  private final byte[][] dict;
035  private final boolean sorted;
036  private final RandomDistribution.DiscreteRNG keyLenRNG, valLenRNG;
037  private BytesWritable lastKey;
038  private static final int MIN_KEY_LEN = 4;
039  private final byte prefix[] = new byte[MIN_KEY_LEN];
040
041  public KVGenerator(Random random, boolean sorted, RandomDistribution.DiscreteRNG keyLenRNG,
042    RandomDistribution.DiscreteRNG valLenRNG, RandomDistribution.DiscreteRNG wordLenRNG,
043    int dictSize) {
044    this.random = random;
045    dict = new byte[dictSize][];
046    this.sorted = sorted;
047    this.keyLenRNG = keyLenRNG;
048    this.valLenRNG = valLenRNG;
049    for (int i = 0; i < dictSize; ++i) {
050      int wordLen = wordLenRNG.nextInt();
051      dict[i] = new byte[wordLen];
052      random.nextBytes(dict[i]);
053    }
054    lastKey = new BytesWritable();
055    fillKey(lastKey);
056  }
057
058  private void fillKey(BytesWritable o) {
059    int len = keyLenRNG.nextInt();
060    if (len < MIN_KEY_LEN) len = MIN_KEY_LEN;
061    o.setSize(len);
062    int n = MIN_KEY_LEN;
063    while (n < len) {
064      byte[] word = dict[random.nextInt(dict.length)];
065      int l = Math.min(word.length, len - n);
066      System.arraycopy(word, 0, o.get(), n, l);
067      n += l;
068    }
069    if (
070      sorted && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN,
071        lastKey.getSize() - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0
072    ) {
073      incrementPrefix();
074    }
075
076    System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN);
077    lastKey.set(o);
078  }
079
080  private void fillValue(BytesWritable o) {
081    int len = valLenRNG.nextInt();
082    o.setSize(len);
083    int n = 0;
084    while (n < len) {
085      byte[] word = dict[random.nextInt(dict.length)];
086      int l = Math.min(word.length, len - n);
087      System.arraycopy(word, 0, o.get(), n, l);
088      n += l;
089    }
090  }
091
092  private void incrementPrefix() {
093    for (int i = MIN_KEY_LEN - 1; i >= 0; --i) {
094      ++prefix[i];
095      if (prefix[i] != 0) return;
096    }
097
098    throw new RuntimeException("Prefix overflown");
099  }
100
101  public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
102    if (dupKey) {
103      key.set(lastKey);
104    } else {
105      fillKey(key);
106    }
107    fillValue(value);
108  }
109}