001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations under
015 * the License.
016 */
017package org.apache.hadoop.hbase.io.hfile;
018
019import java.util.Random;
020
021import org.apache.hadoop.io.BytesWritable;
022import org.apache.hadoop.io.WritableComparator;
023
024/**
025 * Generate random <key, value> pairs.
026 * <p>
027 * Copied from
028 * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
029 * Remove after tfile is committed and use the tfile version of this class
030 * instead.</p>
031 */
032class KVGenerator {
033  private final Random random;
034  private final byte[][] dict;
035  private final boolean sorted;
036  private final RandomDistribution.DiscreteRNG keyLenRNG, valLenRNG;
037  private BytesWritable lastKey;
038  private static final int MIN_KEY_LEN = 4;
039  private final byte prefix[] = new byte[MIN_KEY_LEN];
040
041  public KVGenerator(Random random, boolean sorted,
042      RandomDistribution.DiscreteRNG keyLenRNG,
043      RandomDistribution.DiscreteRNG valLenRNG,
044      RandomDistribution.DiscreteRNG wordLenRNG, int dictSize) {
045    this.random = random;
046    dict = new byte[dictSize][];
047    this.sorted = sorted;
048    this.keyLenRNG = keyLenRNG;
049    this.valLenRNG = valLenRNG;
050    for (int i = 0; i < dictSize; ++i) {
051      int wordLen = wordLenRNG.nextInt();
052      dict[i] = new byte[wordLen];
053      random.nextBytes(dict[i]);
054    }
055    lastKey = new BytesWritable();
056    fillKey(lastKey);
057  }
058
059  private void fillKey(BytesWritable o) {
060    int len = keyLenRNG.nextInt();
061    if (len < MIN_KEY_LEN) len = MIN_KEY_LEN;
062    o.setSize(len);
063    int n = MIN_KEY_LEN;
064    while (n < len) {
065      byte[] word = dict[random.nextInt(dict.length)];
066      int l = Math.min(word.length, len - n);
067      System.arraycopy(word, 0, o.get(), n, l);
068      n += l;
069    }
070    if (sorted
071        && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey
072            .getSize()
073            - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) {
074      incrementPrefix();
075    }
076
077    System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN);
078    lastKey.set(o);
079  }
080
081  private void fillValue(BytesWritable o) {
082    int len = valLenRNG.nextInt();
083    o.setSize(len);
084    int n = 0;
085    while (n < len) {
086      byte[] word = dict[random.nextInt(dict.length)];
087      int l = Math.min(word.length, len - n);
088      System.arraycopy(word, 0, o.get(), n, l);
089      n += l;
090    }
091  }
092
093  private void incrementPrefix() {
094    for (int i = MIN_KEY_LEN - 1; i >= 0; --i) {
095      ++prefix[i];
096      if (prefix[i] != 0) return;
097    }
098
099    throw new RuntimeException("Prefix overflown");
100  }
101
102  public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
103    if (dupKey) {
104      key.set(lastKey);
105    }
106    else {
107      fillKey(key);
108    }
109    fillValue(value);
110  }
111}