View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util.test;
18  
19  import java.nio.ByteBuffer;
20  import java.util.ArrayList;
21  import java.util.Collections;
22  import java.util.HashMap;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.Random;
26  
27  import org.apache.hadoop.hbase.CellComparator;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.Tag;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.util.ByteBufferUtils;
32  import org.apache.hadoop.io.WritableUtils;
33  
34  import com.google.common.primitives.Bytes;
35  
36  /**
37   * Generate list of key values which are very useful to test data block encoding
38   * and compression.
39   */
40  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
41      value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
42      justification="Should probably fix")
43  @InterfaceAudience.Private
44  public class RedundantKVGenerator {
45    // row settings
46    static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
47    static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
48    static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
49    static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
50    static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
51    static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
52    static int DEFAULT_NUMBER_OF_ROW = 500;
53  
54    // qualifier
55    static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
56    static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
57    static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
58    static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
59  
60    static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
61    static int DEFAULT_VALUE_LENGTH = 8;
62    static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
63  
64    static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
65    static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
66  
67    /**
68     * Default constructor, assumes all parameters from class constants.
69     */
70    public RedundantKVGenerator() {
71      this(new Random(42L),
72          DEFAULT_NUMBER_OF_ROW_PREFIXES,
73          DEFAULT_AVERAGE_PREFIX_LENGTH,
74          DEFAULT_PREFIX_LENGTH_VARIANCE,
75          DEFAULT_AVERAGE_SUFFIX_LENGTH,
76          DEFAULT_SUFFIX_LENGTH_VARIANCE,
77          DEFAULT_NUMBER_OF_ROW,
78  
79          DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
80          DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
81          DEFAULT_AVERAGE_QUALIFIER_LENGTH,
82          DEFAULT_QUALIFIER_LENGTH_VARIANCE,
83  
84          DEFAULT_COLUMN_FAMILY_LENGTH,
85          DEFAULT_VALUE_LENGTH,
86          DEFAULT_CHANCE_FOR_ZERO_VALUE,
87  
88          DEFAULT_BASE_TIMESTAMP_DIVIDE,
89          DEFAULT_TIMESTAMP_DIFF_SIZE
90      );
91    }
92  
93  
94    /**
95     * Various configuration options for generating key values
96     * @param randomizer pick things by random
97     */
98    public RedundantKVGenerator(Random randomizer,
99        int numberOfRowPrefixes,
100       int averagePrefixLength,
101       int prefixLengthVariance,
102       int averageSuffixLength,
103       int suffixLengthVariance,
104       int numberOfRows,
105 
106       float chanceForSameQualifier,
107       float chanceForSimiliarQualifier,
108       int averageQualifierLength,
109       int qualifierLengthVariance,
110 
111       int columnFamilyLength,
112       int valueLength,
113       float chanceForZeroValue,
114 
115       int baseTimestampDivide,
116       int timestampDiffSize
117       ) {
118     this.randomizer = randomizer;
119 
120     this.commonPrefix = DEFAULT_COMMON_PREFIX;
121     this.numberOfRowPrefixes = numberOfRowPrefixes;
122     this.averagePrefixLength = averagePrefixLength;
123     this.prefixLengthVariance = prefixLengthVariance;
124     this.averageSuffixLength = averageSuffixLength;
125     this.suffixLengthVariance = suffixLengthVariance;
126     this.numberOfRows = numberOfRows;
127 
128     this.chanceForSameQualifier = chanceForSameQualifier;
129     this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
130     this.averageQualifierLength = averageQualifierLength;
131     this.qualifierLengthVariance = qualifierLengthVariance;
132 
133     this.columnFamilyLength = columnFamilyLength;
134     this.valueLength = valueLength;
135     this.chanceForZeroValue = chanceForZeroValue;
136 
137     this.baseTimestampDivide = baseTimestampDivide;
138     this.timestampDiffSize = timestampDiffSize;
139   }
140 
141   /** Used to generate dataset */
142   private Random randomizer;
143 
144   // row settings
145   private byte[] commonPrefix;//global prefix before rowPrefixes
146   private int numberOfRowPrefixes;
147   private int averagePrefixLength = 6;
148   private int prefixLengthVariance = 3;
149   private int averageSuffixLength = 3;
150   private int suffixLengthVariance = 3;
151   private int numberOfRows = 500;
152 
153   //family
154   private byte[] family;
155 
156   // qualifier
157   private float chanceForSameQualifier = 0.5f;
158   private float chanceForSimilarQualifier = 0.4f;
159   private int averageQualifierLength = 9;
160   private int qualifierLengthVariance = 3;
161 
162   private int columnFamilyLength = 9;
163   private int valueLength = 8;
164   private float chanceForZeroValue = 0.5f;
165 
166   private int baseTimestampDivide = 1000000;
167   private int timestampDiffSize = 100000000;
168 
169   private List<byte[]> generateRows() {
170     // generate prefixes
171     List<byte[]> prefixes = new ArrayList<byte[]>();
172     prefixes.add(new byte[0]);
173     for (int i = 1; i < numberOfRowPrefixes; ++i) {
174       int prefixLength = averagePrefixLength;
175       prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
176           prefixLengthVariance;
177       byte[] newPrefix = new byte[prefixLength];
178       randomizer.nextBytes(newPrefix);
179       byte[] newPrefixWithCommon = newPrefix;
180       prefixes.add(newPrefixWithCommon);
181     }
182 
183     // generate rest of the row
184     List<byte[]> rows = new ArrayList<byte[]>();
185     for (int i = 0; i < numberOfRows; ++i) {
186       int suffixLength = averageSuffixLength;
187       suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
188           suffixLengthVariance;
189       int randomPrefix = randomizer.nextInt(prefixes.size());
190       byte[] row = new byte[prefixes.get(randomPrefix).length +
191                             suffixLength];
192       byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
193       rows.add(rowWithCommonPrefix);
194     }
195 
196     return rows;
197   }
198 
199   /**
200    * Generate test data useful to test encoders.
201    * @param howMany How many Key values should be generated.
202    * @return sorted list of key values
203    */
204   public List<KeyValue> generateTestKeyValues(int howMany) {
205     return generateTestKeyValues(howMany, false);
206   }
207   /**
208    * Generate test data useful to test encoders.
209    * @param howMany How many Key values should be generated.
210    * @return sorted list of key values
211    */
212   public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
213     List<KeyValue> result = new ArrayList<KeyValue>();
214 
215     List<byte[]> rows = generateRows();
216     Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
217 
218     if(family==null){
219       family = new byte[columnFamilyLength];
220       randomizer.nextBytes(family);
221     }
222 
223     long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
224 
225     byte[] value = new byte[valueLength];
226 
227     for (int i = 0; i < howMany; ++i) {
228       long timestamp = baseTimestamp;
229       if(timestampDiffSize > 0){
230         timestamp += randomizer.nextInt(timestampDiffSize);
231       }
232       Integer rowId = randomizer.nextInt(rows.size());
233       byte[] row = rows.get(rowId);
234 
235       // generate qualifier, sometimes it is same, sometimes similar,
236       // occasionally completely different
237       byte[] qualifier;
238       float qualifierChance = randomizer.nextFloat();
239       if (!rowsToQualifier.containsKey(rowId)
240           || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
241         int qualifierLength = averageQualifierLength;
242         qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
243             - qualifierLengthVariance;
244         qualifier = new byte[qualifierLength];
245         randomizer.nextBytes(qualifier);
246 
247         // add it to map
248         if (!rowsToQualifier.containsKey(rowId)) {
249           rowsToQualifier.put(rowId, new ArrayList<byte[]>());
250         }
251         rowsToQualifier.get(rowId).add(qualifier);
252       } else if (qualifierChance > chanceForSameQualifier) {
253         // similar qualifier
254         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
255         byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
256             .size()));
257 
258         qualifier = new byte[originalQualifier.length];
259         int commonPrefix = randomizer.nextInt(qualifier.length);
260         System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
261         for (int j = commonPrefix; j < qualifier.length; ++j) {
262           qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
263         }
264 
265         rowsToQualifier.get(rowId).add(qualifier);
266       } else {
267         // same qualifier
268         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
269         qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
270       }
271 
272       if (randomizer.nextFloat() < chanceForZeroValue) {
273         for (int j = 0; j < value.length; ++j) {
274           value[j] = (byte) 0;
275         }
276       } else {
277         randomizer.nextBytes(value);
278       }
279 
280       if (useTags) {
281         result.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] { new Tag(
282             (byte) 1, "value1") }));
283       } else {
284         result.add(new KeyValue(row, family, qualifier, timestamp, value));
285       }
286     }
287 
288     Collections.sort(result, CellComparator.COMPARATOR);
289 
290     return result;
291   }
292 
293   /**
294    * Convert list of KeyValues to byte buffer.
295    * @param keyValues list of KeyValues to be converted.
296    * @return buffer with content from key values
297    */
298   public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
299       boolean includesMemstoreTS) {
300     int totalSize = 0;
301     for (KeyValue kv : keyValues) {
302       totalSize += kv.getLength();
303       if (includesMemstoreTS) {
304         totalSize += WritableUtils.getVIntSize(kv.getSequenceId());
305       }
306     }
307 
308     ByteBuffer result = ByteBuffer.allocate(totalSize);
309     for (KeyValue kv : keyValues) {
310       result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
311       if (includesMemstoreTS) {
312         ByteBufferUtils.writeVLong(result, kv.getSequenceId());
313       }
314     }
315     return result;
316   }
317   
318   
319   /************************ get/set ***********************************/
320   
321   public RedundantKVGenerator setCommonPrefix(byte[] prefix){
322     this.commonPrefix = prefix;
323     return this;
324   }
325 
326   public RedundantKVGenerator setRandomizer(Random randomizer) {
327     this.randomizer = randomizer;
328     return this;
329   }
330 
331   public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
332     this.numberOfRowPrefixes = numberOfRowPrefixes;
333     return this;
334   }
335 
336   public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
337     this.averagePrefixLength = averagePrefixLength;
338     return this;
339   }
340 
341   public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
342     this.prefixLengthVariance = prefixLengthVariance;
343     return this;
344   }
345 
346   public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
347     this.averageSuffixLength = averageSuffixLength;
348     return this;
349   }
350 
351   public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
352     this.suffixLengthVariance = suffixLengthVariance;
353     return this;
354   }
355 
356   public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
357     this.numberOfRows = numberOfRows;
358     return this;
359   }
360 
361   public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
362     this.chanceForSameQualifier = chanceForSameQualifier;
363     return this;
364   }
365 
366   public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
367     this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
368     return this;
369   }
370 
371   public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
372     this.averageQualifierLength = averageQualifierLength;
373     return this;
374   }
375 
376   public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
377     this.qualifierLengthVariance = qualifierLengthVariance;
378     return this;
379   }
380 
381   public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
382     this.columnFamilyLength = columnFamilyLength;
383     return this;
384   }
385 
386   public RedundantKVGenerator setFamily(byte[] family) {
387     this.family = family;
388     this.columnFamilyLength = family.length;
389     return this;
390   }
391 
392   public RedundantKVGenerator setValueLength(int valueLength) {
393     this.valueLength = valueLength;
394     return this;
395   }
396 
397   public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
398     this.chanceForZeroValue = chanceForZeroValue;
399     return this;
400   }
401 
402   public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
403     this.baseTimestampDivide = baseTimestampDivide;
404     return this;
405   }
406 
407   public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
408     this.timestampDiffSize = timestampDiffSize;
409     return this;
410   }
411   
412 }