001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.util.Random; 021import org.apache.hadoop.hbase.util.RandomDistribution; 022import org.apache.hadoop.io.BytesWritable; 023import org.apache.hadoop.io.WritableComparator; 024 025/** 026 * Generate random <key, value> pairs. 027 * <p> 028 * Copied from <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>. 029 * Remove after tfile is committed and use the tfile version of this class instead. 030 * </p> 031 */ 032class KVGenerator { 033 private final Random random; 034 private final byte[][] dict; 035 private final boolean sorted; 036 private final RandomDistribution.DiscreteRNG keyLenRNG, valLenRNG; 037 private BytesWritable lastKey; 038 private static final int MIN_KEY_LEN = 4; 039 private final byte prefix[] = new byte[MIN_KEY_LEN]; 040 041 public KVGenerator(Random random, boolean sorted, RandomDistribution.DiscreteRNG keyLenRNG, 042 RandomDistribution.DiscreteRNG valLenRNG, RandomDistribution.DiscreteRNG wordLenRNG, 043 int dictSize) { 044 this.random = random; 045 dict = new byte[dictSize][]; 046 this.sorted = sorted; 047 this.keyLenRNG = keyLenRNG; 048 this.valLenRNG = valLenRNG; 049 for (int i = 0; i < dictSize; ++i) { 050 int wordLen = wordLenRNG.nextInt(); 051 dict[i] = new byte[wordLen]; 052 random.nextBytes(dict[i]); 053 } 054 lastKey = new BytesWritable(); 055 fillKey(lastKey); 056 } 057 058 private void fillKey(BytesWritable o) { 059 int len = keyLenRNG.nextInt(); 060 if (len < MIN_KEY_LEN) len = MIN_KEY_LEN; 061 o.setSize(len); 062 int n = MIN_KEY_LEN; 063 while (n < len) { 064 byte[] word = dict[random.nextInt(dict.length)]; 065 int l = Math.min(word.length, len - n); 066 System.arraycopy(word, 0, o.get(), n, l); 067 n += l; 068 } 069 if ( 070 sorted && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, 071 lastKey.getSize() - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0 072 ) { 073 incrementPrefix(); 074 } 075 076 System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN); 077 lastKey.set(o); 078 } 079 080 private void fillValue(BytesWritable o) { 081 int len = valLenRNG.nextInt(); 082 o.setSize(len); 083 int n = 0; 084 while (n < len) { 085 byte[] word = dict[random.nextInt(dict.length)]; 086 int l = Math.min(word.length, len - n); 087 System.arraycopy(word, 0, o.get(), n, l); 088 n += l; 089 } 090 } 091 092 private void incrementPrefix() { 093 for (int i = MIN_KEY_LEN - 1; i >= 0; --i) { 094 ++prefix[i]; 095 if (prefix[i] != 0) return; 096 } 097 098 throw new RuntimeException("Prefix overflown"); 099 } 100 101 public void next(BytesWritable key, BytesWritable value, boolean dupKey) { 102 if (dupKey) { 103 key.set(lastKey); 104 } else { 105 fillKey(key); 106 } 107 fillValue(value); 108 } 109}