View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util.test;
18  
19  import java.nio.ByteBuffer;
20  import java.util.ArrayList;
21  import java.util.Collections;
22  import java.util.HashMap;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.Random;
26  
27  import org.apache.hadoop.hbase.ArrayBackedTag;
28  import org.apache.hadoop.hbase.Cell;
29  import org.apache.hadoop.hbase.CellComparator;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.OffheapKeyValue;
32  import org.apache.hadoop.hbase.Tag;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.hbase.util.ByteBufferUtils;
35  import org.apache.hadoop.io.WritableUtils;
36  
37  import com.google.common.primitives.Bytes;
38  
39  /**
40   * Generate list of key values which are very useful to test data block encoding
41   * and compression.
42   */
43  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
44      value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
45      justification="Should probably fix")
46  @InterfaceAudience.Private
47  public class RedundantKVGenerator {
48    // row settings
49    static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
50    static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
51    static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
52    static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
53    static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
54    static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
55    static int DEFAULT_NUMBER_OF_ROW = 500;
56  
57    // qualifier
58    static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
59    static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
60    static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
61    static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
62  
63    static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
64    static int DEFAULT_VALUE_LENGTH = 8;
65    static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
66  
67    static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
68    static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
69  
70    /**
71     * Default constructor, assumes all parameters from class constants.
72     */
73    public RedundantKVGenerator() {
74      this(new Random(42L),
75          DEFAULT_NUMBER_OF_ROW_PREFIXES,
76          DEFAULT_AVERAGE_PREFIX_LENGTH,
77          DEFAULT_PREFIX_LENGTH_VARIANCE,
78          DEFAULT_AVERAGE_SUFFIX_LENGTH,
79          DEFAULT_SUFFIX_LENGTH_VARIANCE,
80          DEFAULT_NUMBER_OF_ROW,
81  
82          DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
83          DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
84          DEFAULT_AVERAGE_QUALIFIER_LENGTH,
85          DEFAULT_QUALIFIER_LENGTH_VARIANCE,
86  
87          DEFAULT_COLUMN_FAMILY_LENGTH,
88          DEFAULT_VALUE_LENGTH,
89          DEFAULT_CHANCE_FOR_ZERO_VALUE,
90  
91          DEFAULT_BASE_TIMESTAMP_DIVIDE,
92          DEFAULT_TIMESTAMP_DIFF_SIZE
93      );
94    }
95  
96  
97    /**
98     * Various configuration options for generating key values
99     * @param randomizer pick things by random
100    */
101   public RedundantKVGenerator(Random randomizer,
102       int numberOfRowPrefixes,
103       int averagePrefixLength,
104       int prefixLengthVariance,
105       int averageSuffixLength,
106       int suffixLengthVariance,
107       int numberOfRows,
108 
109       float chanceForSameQualifier,
110       float chanceForSimiliarQualifier,
111       int averageQualifierLength,
112       int qualifierLengthVariance,
113 
114       int columnFamilyLength,
115       int valueLength,
116       float chanceForZeroValue,
117 
118       int baseTimestampDivide,
119       int timestampDiffSize
120       ) {
121     this.randomizer = randomizer;
122 
123     this.commonPrefix = DEFAULT_COMMON_PREFIX;
124     this.numberOfRowPrefixes = numberOfRowPrefixes;
125     this.averagePrefixLength = averagePrefixLength;
126     this.prefixLengthVariance = prefixLengthVariance;
127     this.averageSuffixLength = averageSuffixLength;
128     this.suffixLengthVariance = suffixLengthVariance;
129     this.numberOfRows = numberOfRows;
130 
131     this.chanceForSameQualifier = chanceForSameQualifier;
132     this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
133     this.averageQualifierLength = averageQualifierLength;
134     this.qualifierLengthVariance = qualifierLengthVariance;
135 
136     this.columnFamilyLength = columnFamilyLength;
137     this.valueLength = valueLength;
138     this.chanceForZeroValue = chanceForZeroValue;
139 
140     this.baseTimestampDivide = baseTimestampDivide;
141     this.timestampDiffSize = timestampDiffSize;
142   }
143 
144   /** Used to generate dataset */
145   private Random randomizer;
146 
147   // row settings
148   private byte[] commonPrefix;//global prefix before rowPrefixes
149   private int numberOfRowPrefixes;
150   private int averagePrefixLength = 6;
151   private int prefixLengthVariance = 3;
152   private int averageSuffixLength = 3;
153   private int suffixLengthVariance = 3;
154   private int numberOfRows = 500;
155 
156   //family
157   private byte[] family;
158 
159   // qualifier
160   private float chanceForSameQualifier = 0.5f;
161   private float chanceForSimilarQualifier = 0.4f;
162   private int averageQualifierLength = 9;
163   private int qualifierLengthVariance = 3;
164 
165   private int columnFamilyLength = 9;
166   private int valueLength = 8;
167   private float chanceForZeroValue = 0.5f;
168 
169   private int baseTimestampDivide = 1000000;
170   private int timestampDiffSize = 100000000;
171 
172   private List<byte[]> generateRows() {
173     // generate prefixes
174     List<byte[]> prefixes = new ArrayList<byte[]>();
175     prefixes.add(new byte[0]);
176     for (int i = 1; i < numberOfRowPrefixes; ++i) {
177       int prefixLength = averagePrefixLength;
178       prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
179           prefixLengthVariance;
180       byte[] newPrefix = new byte[prefixLength];
181       randomizer.nextBytes(newPrefix);
182       byte[] newPrefixWithCommon = newPrefix;
183       prefixes.add(newPrefixWithCommon);
184     }
185 
186     // generate rest of the row
187     List<byte[]> rows = new ArrayList<byte[]>();
188     for (int i = 0; i < numberOfRows; ++i) {
189       int suffixLength = averageSuffixLength;
190       suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
191           suffixLengthVariance;
192       int randomPrefix = randomizer.nextInt(prefixes.size());
193       byte[] row = new byte[prefixes.get(randomPrefix).length +
194                             suffixLength];
195       byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
196       rows.add(rowWithCommonPrefix);
197     }
198 
199     return rows;
200   }
201 
202   /**
203    * Generate test data useful to test encoders.
204    * @param howMany How many Key values should be generated.
205    * @return sorted list of key values
206    */
207   public List<KeyValue> generateTestKeyValues(int howMany) {
208     return generateTestKeyValues(howMany, false);
209   }
210   /**
211    * Generate test data useful to test encoders.
212    * @param howMany How many Key values should be generated.
213    * @return sorted list of key values
214    */
215   public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
216     List<KeyValue> result = new ArrayList<KeyValue>();
217 
218     List<byte[]> rows = generateRows();
219     Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
220 
221     if(family==null){
222       family = new byte[columnFamilyLength];
223       randomizer.nextBytes(family);
224     }
225 
226     long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
227 
228     byte[] value = new byte[valueLength];
229 
230     for (int i = 0; i < howMany; ++i) {
231       long timestamp = baseTimestamp;
232       if(timestampDiffSize > 0){
233         timestamp += randomizer.nextInt(timestampDiffSize);
234       }
235       Integer rowId = randomizer.nextInt(rows.size());
236       byte[] row = rows.get(rowId);
237 
238       // generate qualifier, sometimes it is same, sometimes similar,
239       // occasionally completely different
240       byte[] qualifier;
241       float qualifierChance = randomizer.nextFloat();
242       if (!rowsToQualifier.containsKey(rowId)
243           || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
244         int qualifierLength = averageQualifierLength;
245         qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
246             - qualifierLengthVariance;
247         qualifier = new byte[qualifierLength];
248         randomizer.nextBytes(qualifier);
249 
250         // add it to map
251         if (!rowsToQualifier.containsKey(rowId)) {
252           rowsToQualifier.put(rowId, new ArrayList<byte[]>());
253         }
254         rowsToQualifier.get(rowId).add(qualifier);
255       } else if (qualifierChance > chanceForSameQualifier) {
256         // similar qualifier
257         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
258         byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
259             .size()));
260 
261         qualifier = new byte[originalQualifier.length];
262         int commonPrefix = randomizer.nextInt(qualifier.length);
263         System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
264         for (int j = commonPrefix; j < qualifier.length; ++j) {
265           qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
266         }
267 
268         rowsToQualifier.get(rowId).add(qualifier);
269       } else {
270         // same qualifier
271         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
272         qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
273       }
274 
275       if (randomizer.nextFloat() < chanceForZeroValue) {
276         for (int j = 0; j < value.length; ++j) {
277           value[j] = (byte) 0;
278         }
279       } else {
280         randomizer.nextBytes(value);
281       }
282 
283       if (useTags) {
284         result.add(new KeyValue(row, family, qualifier, timestamp, value,
285             new Tag[] { new ArrayBackedTag((byte) 1, "value1") }));
286       } else {
287         result.add(new KeyValue(row, family, qualifier, timestamp, value));
288       }
289     }
290 
291     Collections.sort(result, CellComparator.COMPARATOR);
292 
293     return result;
294   }
295 
296   /**
297    * Generate test data useful to test encoders.
298    * @param howMany How many Key values should be generated.
299    * @return sorted list of key values
300    */
301   public List<Cell> generateTestExtendedOffheapKeyValues(int howMany, boolean useTags) {
302     List<Cell> result = new ArrayList<Cell>();
303     List<byte[]> rows = generateRows();
304     Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
305 
306     if (family == null) {
307       family = new byte[columnFamilyLength];
308       randomizer.nextBytes(family);
309     }
310 
311     long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
312 
313     byte[] value = new byte[valueLength];
314 
315     for (int i = 0; i < howMany; ++i) {
316       long timestamp = baseTimestamp;
317       if(timestampDiffSize > 0){
318         timestamp += randomizer.nextInt(timestampDiffSize);
319       }
320       Integer rowId = randomizer.nextInt(rows.size());
321       byte[] row = rows.get(rowId);
322 
323       // generate qualifier, sometimes it is same, sometimes similar,
324       // occasionally completely different
325       byte[] qualifier;
326       float qualifierChance = randomizer.nextFloat();
327       if (!rowsToQualifier.containsKey(rowId)
328           || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
329         int qualifierLength = averageQualifierLength;
330         qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
331             - qualifierLengthVariance;
332         qualifier = new byte[qualifierLength];
333         randomizer.nextBytes(qualifier);
334 
335         // add it to map
336         if (!rowsToQualifier.containsKey(rowId)) {
337           rowsToQualifier.put(rowId, new ArrayList<byte[]>());
338         }
339         rowsToQualifier.get(rowId).add(qualifier);
340       } else if (qualifierChance > chanceForSameQualifier) {
341         // similar qualifier
342         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
343         byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
344             .size()));
345 
346         qualifier = new byte[originalQualifier.length];
347         int commonPrefix = randomizer.nextInt(qualifier.length);
348         System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
349         for (int j = commonPrefix; j < qualifier.length; ++j) {
350           qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
351         }
352 
353         rowsToQualifier.get(rowId).add(qualifier);
354       } else {
355         // same qualifier
356         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
357         qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
358       }
359 
360       if (randomizer.nextFloat() < chanceForZeroValue) {
361         for (int j = 0; j < value.length; ++j) {
362           value[j] = (byte) 0;
363         }
364       } else {
365         randomizer.nextBytes(value);
366       }
367       if (useTags) {
368         KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value,
369             new Tag[] { new ArrayBackedTag((byte) 1, "value1") });
370         ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
371         ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
372           keyValue.getOffset(), keyValue.getLength());
373         OffheapKeyValue offheapKV =
374             new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), true, 0);
375         result.add(offheapKV);
376       } else {
377         KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value);
378         ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
379         ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
380           keyValue.getOffset(), keyValue.getLength());
381         OffheapKeyValue offheapKV =
382             new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), false, 0);
383         result.add(offheapKV);
384       }
385     }
386 
387     Collections.sort(result, CellComparator.COMPARATOR);
388 
389     return result;
390   }
391 
392   static class ExtendedOffheapKeyValue extends OffheapKeyValue {
393     public ExtendedOffheapKeyValue(ByteBuffer buf, int offset, int length, boolean hasTags,
394         long seqId) {
395       super(buf, offset, length, hasTags, seqId);
396     }
397 
398     @Override
399     public byte[] getRowArray() {
400       throw new IllegalArgumentException("getRowArray operation is not allowed");
401     }
402 
403     @Override
404     public int getRowOffset() {
405       throw new IllegalArgumentException("getRowOffset operation is not allowed");
406     }
407 
408     @Override
409     public byte[] getFamilyArray() {
410       throw new IllegalArgumentException("getFamilyArray operation is not allowed");
411     }
412 
413     @Override
414     public int getFamilyOffset() {
415       throw new IllegalArgumentException("getFamilyOffset operation is not allowed");
416     }
417 
418     @Override
419     public byte[] getQualifierArray() {
420       throw new IllegalArgumentException("getQualifierArray operation is not allowed");
421     }
422 
423     @Override
424     public int getQualifierOffset() {
425       throw new IllegalArgumentException("getQualifierOffset operation is not allowed");
426     }
427 
428     @Override
429     public byte[] getValueArray() {
430       throw new IllegalArgumentException("getValueArray operation is not allowed");
431     }
432 
433     @Override
434     public int getValueOffset() {
435       throw new IllegalArgumentException("getValueOffset operation is not allowed");
436     }
437 
438     @Override
439     public byte[] getTagsArray() {
440       throw new IllegalArgumentException("getTagsArray operation is not allowed");
441     }
442 
443     @Override
444     public int getTagsOffset() {
445       throw new IllegalArgumentException("getTagsOffset operation is not allowed");
446     }
447   }
448 
449   /**
450    * Convert list of KeyValues to byte buffer.
451    * @param keyValues list of KeyValues to be converted.
452    * @return buffer with content from key values
453    */
454   public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
455       boolean includesMemstoreTS) {
456     int totalSize = 0;
457     for (KeyValue kv : keyValues) {
458       totalSize += kv.getLength();
459       if (includesMemstoreTS) {
460         totalSize += WritableUtils.getVIntSize(kv.getSequenceId());
461       }
462     }
463 
464     ByteBuffer result = ByteBuffer.allocate(totalSize);
465     for (KeyValue kv : keyValues) {
466       result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
467       if (includesMemstoreTS) {
468         ByteBufferUtils.writeVLong(result, kv.getSequenceId());
469       }
470     }
471     return result;
472   }
473   
474   
475   /************************ get/set ***********************************/
476   
477   public RedundantKVGenerator setCommonPrefix(byte[] prefix){
478     this.commonPrefix = prefix;
479     return this;
480   }
481 
482   public RedundantKVGenerator setRandomizer(Random randomizer) {
483     this.randomizer = randomizer;
484     return this;
485   }
486 
487   public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
488     this.numberOfRowPrefixes = numberOfRowPrefixes;
489     return this;
490   }
491 
492   public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
493     this.averagePrefixLength = averagePrefixLength;
494     return this;
495   }
496 
497   public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
498     this.prefixLengthVariance = prefixLengthVariance;
499     return this;
500   }
501 
502   public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
503     this.averageSuffixLength = averageSuffixLength;
504     return this;
505   }
506 
507   public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
508     this.suffixLengthVariance = suffixLengthVariance;
509     return this;
510   }
511 
512   public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
513     this.numberOfRows = numberOfRows;
514     return this;
515   }
516 
517   public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
518     this.chanceForSameQualifier = chanceForSameQualifier;
519     return this;
520   }
521 
522   public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
523     this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
524     return this;
525   }
526 
527   public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
528     this.averageQualifierLength = averageQualifierLength;
529     return this;
530   }
531 
532   public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
533     this.qualifierLengthVariance = qualifierLengthVariance;
534     return this;
535   }
536 
537   public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
538     this.columnFamilyLength = columnFamilyLength;
539     return this;
540   }
541 
542   public RedundantKVGenerator setFamily(byte[] family) {
543     this.family = family;
544     this.columnFamilyLength = family.length;
545     return this;
546   }
547 
548   public RedundantKVGenerator setValueLength(int valueLength) {
549     this.valueLength = valueLength;
550     return this;
551   }
552 
553   public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
554     this.chanceForZeroValue = chanceForZeroValue;
555     return this;
556   }
557 
558   public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
559     this.baseTimestampDivide = baseTimestampDivide;
560     return this;
561   }
562 
563   public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
564     this.timestampDiffSize = timestampDiffSize;
565     return this;
566   }
567   
568 }