1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.util.test;
18
19 import java.nio.ByteBuffer;
20 import java.util.ArrayList;
21 import java.util.Collections;
22 import java.util.HashMap;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Random;
26
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.KeyValue;
29 import org.apache.hadoop.hbase.Tag;
30 import org.apache.hadoop.hbase.util.ByteBufferUtils;
31 import org.apache.hadoop.io.WritableUtils;
32
33 import com.google.common.primitives.Bytes;
34
35
36
37
38
39 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
40 value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
41 justification="Should probably fix")
42 @InterfaceAudience.Private
43 public class RedundantKVGenerator {
44
45 static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
46 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
47 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
48 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
49 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
50 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
51 static int DEFAULT_NUMBER_OF_ROW = 500;
52
53
54 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
55 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
56 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
57 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
58
59 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
60 static int DEFAULT_VALUE_LENGTH = 8;
61 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
62
63 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
64 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
65
66
67
68
69 public RedundantKVGenerator() {
70 this(new Random(42L),
71 DEFAULT_NUMBER_OF_ROW_PREFIXES,
72 DEFAULT_AVERAGE_PREFIX_LENGTH,
73 DEFAULT_PREFIX_LENGTH_VARIANCE,
74 DEFAULT_AVERAGE_SUFFIX_LENGTH,
75 DEFAULT_SUFFIX_LENGTH_VARIANCE,
76 DEFAULT_NUMBER_OF_ROW,
77
78 DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
79 DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
80 DEFAULT_AVERAGE_QUALIFIER_LENGTH,
81 DEFAULT_QUALIFIER_LENGTH_VARIANCE,
82
83 DEFAULT_COLUMN_FAMILY_LENGTH,
84 DEFAULT_VALUE_LENGTH,
85 DEFAULT_CHANCE_FOR_ZERO_VALUE,
86
87 DEFAULT_BASE_TIMESTAMP_DIVIDE,
88 DEFAULT_TIMESTAMP_DIFF_SIZE
89 );
90 }
91
92
93
94
95
96
97 public RedundantKVGenerator(Random randomizer,
98 int numberOfRowPrefixes,
99 int averagePrefixLength,
100 int prefixLengthVariance,
101 int averageSuffixLength,
102 int suffixLengthVariance,
103 int numberOfRows,
104
105 float chanceForSameQualifier,
106 float chanceForSimiliarQualifier,
107 int averageQualifierLength,
108 int qualifierLengthVariance,
109
110 int columnFamilyLength,
111 int valueLength,
112 float chanceForZeroValue,
113
114 int baseTimestampDivide,
115 int timestampDiffSize
116 ) {
117 this.randomizer = randomizer;
118
119 this.commonPrefix = DEFAULT_COMMON_PREFIX;
120 this.numberOfRowPrefixes = numberOfRowPrefixes;
121 this.averagePrefixLength = averagePrefixLength;
122 this.prefixLengthVariance = prefixLengthVariance;
123 this.averageSuffixLength = averageSuffixLength;
124 this.suffixLengthVariance = suffixLengthVariance;
125 this.numberOfRows = numberOfRows;
126
127 this.chanceForSameQualifier = chanceForSameQualifier;
128 this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
129 this.averageQualifierLength = averageQualifierLength;
130 this.qualifierLengthVariance = qualifierLengthVariance;
131
132 this.columnFamilyLength = columnFamilyLength;
133 this.valueLength = valueLength;
134 this.chanceForZeroValue = chanceForZeroValue;
135
136 this.baseTimestampDivide = baseTimestampDivide;
137 this.timestampDiffSize = timestampDiffSize;
138 }
139
140
141 private Random randomizer;
142
143
144 private byte[] commonPrefix;
145 private int numberOfRowPrefixes;
146 private int averagePrefixLength = 6;
147 private int prefixLengthVariance = 3;
148 private int averageSuffixLength = 3;
149 private int suffixLengthVariance = 3;
150 private int numberOfRows = 500;
151
152
153 private byte[] family;
154
155
156 private float chanceForSameQualifier = 0.5f;
157 private float chanceForSimilarQualifier = 0.4f;
158 private int averageQualifierLength = 9;
159 private int qualifierLengthVariance = 3;
160
161 private int columnFamilyLength = 9;
162 private int valueLength = 8;
163 private float chanceForZeroValue = 0.5f;
164
165 private int baseTimestampDivide = 1000000;
166 private int timestampDiffSize = 100000000;
167
168 private List<byte[]> generateRows() {
169
170 List<byte[]> prefixes = new ArrayList<byte[]>();
171 prefixes.add(new byte[0]);
172 for (int i = 1; i < numberOfRowPrefixes; ++i) {
173 int prefixLength = averagePrefixLength;
174 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
175 prefixLengthVariance;
176 byte[] newPrefix = new byte[prefixLength];
177 randomizer.nextBytes(newPrefix);
178 byte[] newPrefixWithCommon = newPrefix;
179 prefixes.add(newPrefixWithCommon);
180 }
181
182
183 List<byte[]> rows = new ArrayList<byte[]>();
184 for (int i = 0; i < numberOfRows; ++i) {
185 int suffixLength = averageSuffixLength;
186 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
187 suffixLengthVariance;
188 int randomPrefix = randomizer.nextInt(prefixes.size());
189 byte[] row = new byte[prefixes.get(randomPrefix).length +
190 suffixLength];
191 byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
192 rows.add(rowWithCommonPrefix);
193 }
194
195 return rows;
196 }
197
198
199
200
201
202
203 public List<KeyValue> generateTestKeyValues(int howMany) {
204 return generateTestKeyValues(howMany, false);
205 }
206
207
208
209
210
211 public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
212 List<KeyValue> result = new ArrayList<KeyValue>();
213
214 List<byte[]> rows = generateRows();
215 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
216
217 if(family==null){
218 family = new byte[columnFamilyLength];
219 randomizer.nextBytes(family);
220 }
221
222 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
223
224 byte[] value = new byte[valueLength];
225
226 for (int i = 0; i < howMany; ++i) {
227 long timestamp = baseTimestamp;
228 if(timestampDiffSize > 0){
229 timestamp += randomizer.nextInt(timestampDiffSize);
230 }
231 Integer rowId = randomizer.nextInt(rows.size());
232 byte[] row = rows.get(rowId);
233
234
235
236 byte[] qualifier;
237 float qualifierChance = randomizer.nextFloat();
238 if (!rowsToQualifier.containsKey(rowId)
239 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
240 int qualifierLength = averageQualifierLength;
241 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
242 - qualifierLengthVariance;
243 qualifier = new byte[qualifierLength];
244 randomizer.nextBytes(qualifier);
245
246
247 if (!rowsToQualifier.containsKey(rowId)) {
248 rowsToQualifier.put(rowId, new ArrayList<byte[]>());
249 }
250 rowsToQualifier.get(rowId).add(qualifier);
251 } else if (qualifierChance > chanceForSameQualifier) {
252
253 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
254 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
255 .size()));
256
257 qualifier = new byte[originalQualifier.length];
258 int commonPrefix = randomizer.nextInt(qualifier.length);
259 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
260 for (int j = commonPrefix; j < qualifier.length; ++j) {
261 qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
262 }
263
264 rowsToQualifier.get(rowId).add(qualifier);
265 } else {
266
267 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
268 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
269 }
270
271 if (randomizer.nextFloat() < chanceForZeroValue) {
272 for (int j = 0; j < value.length; ++j) {
273 value[j] = (byte) 0;
274 }
275 } else {
276 randomizer.nextBytes(value);
277 }
278
279 if (useTags) {
280 result.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] { new Tag(
281 (byte) 1, "value1") }));
282 } else {
283 result.add(new KeyValue(row, family, qualifier, timestamp, value));
284 }
285 }
286
287 Collections.sort(result, KeyValue.COMPARATOR);
288
289 return result;
290 }
291
292
293
294
295
296
297 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
298 boolean includesMemstoreTS) {
299 int totalSize = 0;
300 for (KeyValue kv : keyValues) {
301 totalSize += kv.getLength();
302 if (includesMemstoreTS) {
303 totalSize += WritableUtils.getVIntSize(kv.getMvccVersion());
304 }
305 }
306
307 ByteBuffer result = ByteBuffer.allocate(totalSize);
308 for (KeyValue kv : keyValues) {
309 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
310 if (includesMemstoreTS) {
311 ByteBufferUtils.writeVLong(result, kv.getMvccVersion());
312 }
313 }
314 return result;
315 }
316
317
318
319
320 public RedundantKVGenerator setCommonPrefix(byte[] prefix){
321 this.commonPrefix = prefix;
322 return this;
323 }
324
325 public RedundantKVGenerator setRandomizer(Random randomizer) {
326 this.randomizer = randomizer;
327 return this;
328 }
329
330 public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
331 this.numberOfRowPrefixes = numberOfRowPrefixes;
332 return this;
333 }
334
335 public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
336 this.averagePrefixLength = averagePrefixLength;
337 return this;
338 }
339
340 public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
341 this.prefixLengthVariance = prefixLengthVariance;
342 return this;
343 }
344
345 public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
346 this.averageSuffixLength = averageSuffixLength;
347 return this;
348 }
349
350 public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
351 this.suffixLengthVariance = suffixLengthVariance;
352 return this;
353 }
354
355 public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
356 this.numberOfRows = numberOfRows;
357 return this;
358 }
359
360 public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
361 this.chanceForSameQualifier = chanceForSameQualifier;
362 return this;
363 }
364
365 public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
366 this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
367 return this;
368 }
369
370 public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
371 this.averageQualifierLength = averageQualifierLength;
372 return this;
373 }
374
375 public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
376 this.qualifierLengthVariance = qualifierLengthVariance;
377 return this;
378 }
379
380 public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
381 this.columnFamilyLength = columnFamilyLength;
382 return this;
383 }
384
385 public RedundantKVGenerator setFamily(byte[] family) {
386 this.family = family;
387 this.columnFamilyLength = family.length;
388 return this;
389 }
390
391 public RedundantKVGenerator setValueLength(int valueLength) {
392 this.valueLength = valueLength;
393 return this;
394 }
395
396 public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
397 this.chanceForZeroValue = chanceForZeroValue;
398 return this;
399 }
400
401 public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
402 this.baseTimestampDivide = baseTimestampDivide;
403 return this;
404 }
405
406 public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
407 this.timestampDiffSize = timestampDiffSize;
408 return this;
409 }
410
411 }