001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations
015 * under the License.
016 */
017package org.apache.hadoop.hbase.util;
018
019import java.nio.ByteBuffer;
020import java.util.ArrayList;
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.List;
024import java.util.Map;
025import java.util.Random;
026
027import org.apache.hadoop.hbase.ArrayBackedTag;
028import org.apache.hadoop.hbase.ByteBufferKeyValue;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellComparator;
031import org.apache.hadoop.hbase.KeyValue;
032import org.apache.hadoop.hbase.Tag;
033import org.apache.hadoop.io.WritableUtils;
034import org.apache.yetus.audience.InterfaceAudience;
035
036import org.apache.hbase.thirdparty.com.google.common.primitives.Bytes;
037
038/**
039 * Generate list of key values which are very useful to test data block encoding
040 * and compression.
041 */
042@edu.umd.cs.findbugs.annotations.SuppressWarnings(
043    value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
044    justification="Should probably fix")
045@InterfaceAudience.Private
046public class RedundantKVGenerator {
047  // row settings
048  static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
049  static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
050  static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
051  static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
052  static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
053  static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
054  static int DEFAULT_NUMBER_OF_ROW = 500;
055
056  // qualifier
057  static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
058  static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
059  static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
060  static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
061
062  static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
063  static int DEFAULT_VALUE_LENGTH = 8;
064  static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
065
066  static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
067  static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
068
069  /**
070   * Default constructor, assumes all parameters from class constants.
071   */
072  public RedundantKVGenerator() {
073    this(new Random(42L),
074        DEFAULT_NUMBER_OF_ROW_PREFIXES,
075        DEFAULT_AVERAGE_PREFIX_LENGTH,
076        DEFAULT_PREFIX_LENGTH_VARIANCE,
077        DEFAULT_AVERAGE_SUFFIX_LENGTH,
078        DEFAULT_SUFFIX_LENGTH_VARIANCE,
079        DEFAULT_NUMBER_OF_ROW,
080
081        DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
082        DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
083        DEFAULT_AVERAGE_QUALIFIER_LENGTH,
084        DEFAULT_QUALIFIER_LENGTH_VARIANCE,
085
086        DEFAULT_COLUMN_FAMILY_LENGTH,
087        DEFAULT_VALUE_LENGTH,
088        DEFAULT_CHANCE_FOR_ZERO_VALUE,
089
090        DEFAULT_BASE_TIMESTAMP_DIVIDE,
091        DEFAULT_TIMESTAMP_DIFF_SIZE
092    );
093  }
094
095
096  /**
097   * Various configuration options for generating key values
098   * @param randomizer pick things by random
099   */
100  public RedundantKVGenerator(Random randomizer,
101      int numberOfRowPrefixes,
102      int averagePrefixLength,
103      int prefixLengthVariance,
104      int averageSuffixLength,
105      int suffixLengthVariance,
106      int numberOfRows,
107
108      float chanceForSameQualifier,
109      float chanceForSimiliarQualifier,
110      int averageQualifierLength,
111      int qualifierLengthVariance,
112
113      int columnFamilyLength,
114      int valueLength,
115      float chanceForZeroValue,
116
117      int baseTimestampDivide,
118      int timestampDiffSize
119      ) {
120    this.randomizer = randomizer;
121
122    this.commonPrefix = DEFAULT_COMMON_PREFIX;
123    this.numberOfRowPrefixes = numberOfRowPrefixes;
124    this.averagePrefixLength = averagePrefixLength;
125    this.prefixLengthVariance = prefixLengthVariance;
126    this.averageSuffixLength = averageSuffixLength;
127    this.suffixLengthVariance = suffixLengthVariance;
128    this.numberOfRows = numberOfRows;
129
130    this.chanceForSameQualifier = chanceForSameQualifier;
131    this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
132    this.averageQualifierLength = averageQualifierLength;
133    this.qualifierLengthVariance = qualifierLengthVariance;
134
135    this.columnFamilyLength = columnFamilyLength;
136    this.valueLength = valueLength;
137    this.chanceForZeroValue = chanceForZeroValue;
138
139    this.baseTimestampDivide = baseTimestampDivide;
140    this.timestampDiffSize = timestampDiffSize;
141  }
142
143  /** Used to generate dataset */
144  private Random randomizer;
145
146  // row settings
147  private byte[] commonPrefix;//global prefix before rowPrefixes
148  private int numberOfRowPrefixes;
149  private int averagePrefixLength = 6;
150  private int prefixLengthVariance = 3;
151  private int averageSuffixLength = 3;
152  private int suffixLengthVariance = 3;
153  private int numberOfRows = 500;
154
155  //family
156  private byte[] family;
157
158  // qualifier
159  private float chanceForSameQualifier = 0.5f;
160  private float chanceForSimilarQualifier = 0.4f;
161  private int averageQualifierLength = 9;
162  private int qualifierLengthVariance = 3;
163
164  private int columnFamilyLength = 9;
165  private int valueLength = 8;
166  private float chanceForZeroValue = 0.5f;
167
168  private int baseTimestampDivide = 1000000;
169  private int timestampDiffSize = 100000000;
170
171  private List<byte[]> generateRows() {
172    // generate prefixes
173    List<byte[]> prefixes = new ArrayList<>();
174    prefixes.add(new byte[0]);
175    for (int i = 1; i < numberOfRowPrefixes; ++i) {
176      int prefixLength = averagePrefixLength;
177      prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
178          prefixLengthVariance;
179      byte[] newPrefix = new byte[prefixLength];
180      randomizer.nextBytes(newPrefix);
181      byte[] newPrefixWithCommon = newPrefix;
182      prefixes.add(newPrefixWithCommon);
183    }
184
185    // generate rest of the row
186    List<byte[]> rows = new ArrayList<>();
187    for (int i = 0; i < numberOfRows; ++i) {
188      int suffixLength = averageSuffixLength;
189      suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
190          suffixLengthVariance;
191      int randomPrefix = randomizer.nextInt(prefixes.size());
192      byte[] row = new byte[prefixes.get(randomPrefix).length +
193                            suffixLength];
194      byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
195      rows.add(rowWithCommonPrefix);
196    }
197
198    return rows;
199  }
200
201  /**
202   * Generate test data useful to test encoders.
203   * @param howMany How many Key values should be generated.
204   * @return sorted list of key values
205   */
206  public List<KeyValue> generateTestKeyValues(int howMany) {
207    return generateTestKeyValues(howMany, false);
208  }
209  /**
210   * Generate test data useful to test encoders.
211   * @param howMany How many Key values should be generated.
212   * @return sorted list of key values
213   */
214  public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
215    List<KeyValue> result = new ArrayList<>();
216
217    List<byte[]> rows = generateRows();
218    Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
219
220    if(family==null){
221      family = new byte[columnFamilyLength];
222      randomizer.nextBytes(family);
223    }
224
225    long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
226
227    byte[] value = new byte[valueLength];
228
229    for (int i = 0; i < howMany; ++i) {
230      long timestamp = baseTimestamp;
231      if(timestampDiffSize > 0){
232        timestamp += randomizer.nextInt(timestampDiffSize);
233      }
234      Integer rowId = randomizer.nextInt(rows.size());
235      byte[] row = rows.get(rowId);
236
237      // generate qualifier, sometimes it is same, sometimes similar,
238      // occasionally completely different
239      byte[] qualifier;
240      float qualifierChance = randomizer.nextFloat();
241      if (!rowsToQualifier.containsKey(rowId)
242          || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
243        int qualifierLength = averageQualifierLength;
244        qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
245            - qualifierLengthVariance;
246        qualifier = new byte[qualifierLength];
247        randomizer.nextBytes(qualifier);
248
249        // add it to map
250        if (!rowsToQualifier.containsKey(rowId)) {
251          rowsToQualifier.put(rowId, new ArrayList<>());
252        }
253        rowsToQualifier.get(rowId).add(qualifier);
254      } else if (qualifierChance > chanceForSameQualifier) {
255        // similar qualifier
256        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
257        byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
258            .size()));
259
260        qualifier = new byte[originalQualifier.length];
261        int commonPrefix = randomizer.nextInt(qualifier.length);
262        System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
263        for (int j = commonPrefix; j < qualifier.length; ++j) {
264          qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
265        }
266
267        rowsToQualifier.get(rowId).add(qualifier);
268      } else {
269        // same qualifier
270        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
271        qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
272      }
273
274      if (randomizer.nextFloat() < chanceForZeroValue) {
275        for (int j = 0; j < value.length; ++j) {
276          value[j] = (byte) 0;
277        }
278      } else {
279        randomizer.nextBytes(value);
280      }
281
282      if (useTags) {
283        result.add(new KeyValue(row, family, qualifier, timestamp, value,
284            new Tag[] { new ArrayBackedTag((byte) 1, "value1") }));
285      } else {
286        result.add(new KeyValue(row, family, qualifier, timestamp, value));
287      }
288    }
289
290    Collections.sort(result, CellComparator.getInstance());
291
292    return result;
293  }
294
295  /**
296   * Generate test data useful to test encoders.
297   * @param howMany How many Key values should be generated.
298   * @return sorted list of key values
299   */
300  public List<Cell> generateTestExtendedOffheapKeyValues(int howMany, boolean useTags) {
301    List<Cell> result = new ArrayList<>();
302    List<byte[]> rows = generateRows();
303    Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
304
305    if (family == null) {
306      family = new byte[columnFamilyLength];
307      randomizer.nextBytes(family);
308    }
309
310    long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
311
312    byte[] value = new byte[valueLength];
313
314    for (int i = 0; i < howMany; ++i) {
315      long timestamp = baseTimestamp;
316      if(timestampDiffSize > 0){
317        timestamp += randomizer.nextInt(timestampDiffSize);
318      }
319      Integer rowId = randomizer.nextInt(rows.size());
320      byte[] row = rows.get(rowId);
321
322      // generate qualifier, sometimes it is same, sometimes similar,
323      // occasionally completely different
324      byte[] qualifier;
325      float qualifierChance = randomizer.nextFloat();
326      if (!rowsToQualifier.containsKey(rowId)
327          || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
328        int qualifierLength = averageQualifierLength;
329        qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
330            - qualifierLengthVariance;
331        qualifier = new byte[qualifierLength];
332        randomizer.nextBytes(qualifier);
333
334        // add it to map
335        if (!rowsToQualifier.containsKey(rowId)) {
336          rowsToQualifier.put(rowId, new ArrayList<>());
337        }
338        rowsToQualifier.get(rowId).add(qualifier);
339      } else if (qualifierChance > chanceForSameQualifier) {
340        // similar qualifier
341        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
342        byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
343            .size()));
344
345        qualifier = new byte[originalQualifier.length];
346        int commonPrefix = randomizer.nextInt(qualifier.length);
347        System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
348        for (int j = commonPrefix; j < qualifier.length; ++j) {
349          qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
350        }
351
352        rowsToQualifier.get(rowId).add(qualifier);
353      } else {
354        // same qualifier
355        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
356        qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
357      }
358
359      if (randomizer.nextFloat() < chanceForZeroValue) {
360        for (int j = 0; j < value.length; ++j) {
361          value[j] = (byte) 0;
362        }
363      } else {
364        randomizer.nextBytes(value);
365      }
366      if (useTags) {
367        KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value,
368            new Tag[] { new ArrayBackedTag((byte) 1, "value1") });
369        ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
370        ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
371          keyValue.getOffset(), keyValue.getLength());
372        ByteBufferKeyValue offheapKV =
373            new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0);
374        result.add(offheapKV);
375      } else {
376        KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value);
377        ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
378        ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
379          keyValue.getOffset(), keyValue.getLength());
380        ByteBufferKeyValue offheapKV =
381            new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0);
382        result.add(offheapKV);
383      }
384    }
385
386    Collections.sort(result, CellComparator.getInstance());
387
388    return result;
389  }
390
391  static class ExtendedOffheapKeyValue extends ByteBufferKeyValue {
392    public ExtendedOffheapKeyValue(ByteBuffer buf, int offset, int length, long seqId) {
393      super(buf, offset, length, seqId);
394    }
395
396    @Override
397    public byte[] getRowArray() {
398      throw new IllegalArgumentException("getRowArray operation is not allowed");
399    }
400
401    @Override
402    public int getRowOffset() {
403      throw new IllegalArgumentException("getRowOffset operation is not allowed");
404    }
405
406    @Override
407    public byte[] getFamilyArray() {
408      throw new IllegalArgumentException("getFamilyArray operation is not allowed");
409    }
410
411    @Override
412    public int getFamilyOffset() {
413      throw new IllegalArgumentException("getFamilyOffset operation is not allowed");
414    }
415
416    @Override
417    public byte[] getQualifierArray() {
418      throw new IllegalArgumentException("getQualifierArray operation is not allowed");
419    }
420
421    @Override
422    public int getQualifierOffset() {
423      throw new IllegalArgumentException("getQualifierOffset operation is not allowed");
424    }
425
426    @Override
427    public byte[] getValueArray() {
428      throw new IllegalArgumentException("getValueArray operation is not allowed");
429    }
430
431    @Override
432    public int getValueOffset() {
433      throw new IllegalArgumentException("getValueOffset operation is not allowed");
434    }
435
436    @Override
437    public byte[] getTagsArray() {
438      throw new IllegalArgumentException("getTagsArray operation is not allowed");
439    }
440
441    @Override
442    public int getTagsOffset() {
443      throw new IllegalArgumentException("getTagsOffset operation is not allowed");
444    }
445  }
446
447  /**
448   * Convert list of KeyValues to byte buffer.
449   * @param keyValues list of KeyValues to be converted.
450   * @return buffer with content from key values
451   */
452  public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
453      boolean includesMemstoreTS) {
454    int totalSize = 0;
455    for (KeyValue kv : keyValues) {
456      totalSize += kv.getLength();
457      if (includesMemstoreTS) {
458        totalSize += WritableUtils.getVIntSize(kv.getSequenceId());
459      }
460    }
461
462    ByteBuffer result = ByteBuffer.allocate(totalSize);
463    for (KeyValue kv : keyValues) {
464      result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
465      if (includesMemstoreTS) {
466        ByteBufferUtils.writeVLong(result, kv.getSequenceId());
467      }
468    }
469    return result;
470  }
471
472  /************************ get/set ***********************************/
473  public RedundantKVGenerator setCommonPrefix(byte[] prefix){
474    this.commonPrefix = prefix;
475    return this;
476  }
477
478  public RedundantKVGenerator setRandomizer(Random randomizer) {
479    this.randomizer = randomizer;
480    return this;
481  }
482
483  public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
484    this.numberOfRowPrefixes = numberOfRowPrefixes;
485    return this;
486  }
487
488  public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
489    this.averagePrefixLength = averagePrefixLength;
490    return this;
491  }
492
493  public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
494    this.prefixLengthVariance = prefixLengthVariance;
495    return this;
496  }
497
498  public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
499    this.averageSuffixLength = averageSuffixLength;
500    return this;
501  }
502
503  public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
504    this.suffixLengthVariance = suffixLengthVariance;
505    return this;
506  }
507
508  public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
509    this.numberOfRows = numberOfRows;
510    return this;
511  }
512
513  public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
514    this.chanceForSameQualifier = chanceForSameQualifier;
515    return this;
516  }
517
518  public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
519    this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
520    return this;
521  }
522
523  public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
524    this.averageQualifierLength = averageQualifierLength;
525    return this;
526  }
527
528  public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
529    this.qualifierLengthVariance = qualifierLengthVariance;
530    return this;
531  }
532
533  public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
534    this.columnFamilyLength = columnFamilyLength;
535    return this;
536  }
537
538  public RedundantKVGenerator setFamily(byte[] family) {
539    this.family = family;
540    this.columnFamilyLength = family.length;
541    return this;
542  }
543
544  public RedundantKVGenerator setValueLength(int valueLength) {
545    this.valueLength = valueLength;
546    return this;
547  }
548
549  public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
550    this.chanceForZeroValue = chanceForZeroValue;
551    return this;
552  }
553
554  public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
555    this.baseTimestampDivide = baseTimestampDivide;
556    return this;
557  }
558
559  public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
560    this.timestampDiffSize = timestampDiffSize;
561    return this;
562  }
563}