001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations under 015 * the License. 016 */ 017package org.apache.hadoop.hbase.io.hfile; 018 019import java.util.Random; 020 021import org.apache.hadoop.io.BytesWritable; 022import org.apache.hadoop.io.WritableComparator; 023 024/** 025 * Generate random <key, value> pairs. 026 * <p> 027 * Copied from 028 * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>. 029 * Remove after tfile is committed and use the tfile version of this class 030 * instead.</p> 031 */ 032class KVGenerator { 033 private final Random random; 034 private final byte[][] dict; 035 private final boolean sorted; 036 private final RandomDistribution.DiscreteRNG keyLenRNG, valLenRNG; 037 private BytesWritable lastKey; 038 private static final int MIN_KEY_LEN = 4; 039 private final byte prefix[] = new byte[MIN_KEY_LEN]; 040 041 public KVGenerator(Random random, boolean sorted, 042 RandomDistribution.DiscreteRNG keyLenRNG, 043 RandomDistribution.DiscreteRNG valLenRNG, 044 RandomDistribution.DiscreteRNG wordLenRNG, int dictSize) { 045 this.random = random; 046 dict = new byte[dictSize][]; 047 this.sorted = sorted; 048 this.keyLenRNG = keyLenRNG; 049 this.valLenRNG = valLenRNG; 050 for (int i = 0; i < dictSize; ++i) { 051 int wordLen = wordLenRNG.nextInt(); 052 dict[i] = new byte[wordLen]; 053 random.nextBytes(dict[i]); 054 } 055 lastKey = new BytesWritable(); 056 fillKey(lastKey); 057 } 058 059 private void fillKey(BytesWritable o) { 060 int len = keyLenRNG.nextInt(); 061 if (len < MIN_KEY_LEN) len = MIN_KEY_LEN; 062 o.setSize(len); 063 int n = MIN_KEY_LEN; 064 while (n < len) { 065 byte[] word = dict[random.nextInt(dict.length)]; 066 int l = Math.min(word.length, len - n); 067 System.arraycopy(word, 0, o.get(), n, l); 068 n += l; 069 } 070 if (sorted 071 && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey 072 .getSize() 073 - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) { 074 incrementPrefix(); 075 } 076 077 System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN); 078 lastKey.set(o); 079 } 080 081 private void fillValue(BytesWritable o) { 082 int len = valLenRNG.nextInt(); 083 o.setSize(len); 084 int n = 0; 085 while (n < len) { 086 byte[] word = dict[random.nextInt(dict.length)]; 087 int l = Math.min(word.length, len - n); 088 System.arraycopy(word, 0, o.get(), n, l); 089 n += l; 090 } 091 } 092 093 private void incrementPrefix() { 094 for (int i = MIN_KEY_LEN - 1; i >= 0; --i) { 095 ++prefix[i]; 096 if (prefix[i] != 0) return; 097 } 098 099 throw new RuntimeException("Prefix overflown"); 100 } 101 102 public void next(BytesWritable key, BytesWritable value, boolean dupKey) { 103 if (dupKey) { 104 key.set(lastKey); 105 } 106 else { 107 fillKey(key); 108 } 109 fillValue(value); 110 } 111}