001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations
015 * under the License.
016 */
017package org.apache.hadoop.hbase.util;
018
019import java.nio.ByteBuffer;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.HashMap;
023import java.util.List;
024import java.util.Map;
025import java.util.Random;
026
027import org.apache.hadoop.hbase.ArrayBackedTag;
028import org.apache.hadoop.hbase.ByteBufferKeyValue;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellComparator;
031import org.apache.hadoop.hbase.KeyValue;
032import org.apache.hadoop.hbase.Tag;
033import org.apache.hadoop.io.WritableUtils;
034import org.apache.yetus.audience.InterfaceAudience;
035
036import org.apache.hbase.thirdparty.com.google.common.primitives.Bytes;
037
038/**
039 * Generate list of key values which are very useful to test data block encoding
040 * and compression.
041 */
042@edu.umd.cs.findbugs.annotations.SuppressWarnings(
043    value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
044    justification="Should probably fix")
045@InterfaceAudience.Private
046public class RedundantKVGenerator {
047  // row settings
048  static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
049  static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
050  static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
051  static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
052  static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
053  static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
054  static int DEFAULT_NUMBER_OF_ROW = 500;
055
056  // qualifier
057  static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
058  static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
059  static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
060  static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
061
062  static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
063  static int DEFAULT_VALUE_LENGTH = 8;
064  static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
065
066  static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
067  static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
068
069  /**
070   * Default constructor, assumes all parameters from class constants.
071   */
072  public RedundantKVGenerator() {
073    this(new Random(42L),
074        DEFAULT_NUMBER_OF_ROW_PREFIXES,
075        DEFAULT_AVERAGE_PREFIX_LENGTH,
076        DEFAULT_PREFIX_LENGTH_VARIANCE,
077        DEFAULT_AVERAGE_SUFFIX_LENGTH,
078        DEFAULT_SUFFIX_LENGTH_VARIANCE,
079        DEFAULT_NUMBER_OF_ROW,
080
081        DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
082        DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
083        DEFAULT_AVERAGE_QUALIFIER_LENGTH,
084        DEFAULT_QUALIFIER_LENGTH_VARIANCE,
085
086        DEFAULT_COLUMN_FAMILY_LENGTH,
087        DEFAULT_VALUE_LENGTH,
088        DEFAULT_CHANCE_FOR_ZERO_VALUE,
089
090        DEFAULT_BASE_TIMESTAMP_DIVIDE,
091        DEFAULT_TIMESTAMP_DIFF_SIZE
092    );
093  }
094
095  /**
096   * Various configuration options for generating key values
097   * @param randomizer pick things by random
098   */
099  public RedundantKVGenerator(Random randomizer,
100      int numberOfRowPrefixes,
101      int averagePrefixLength,
102      int prefixLengthVariance,
103      int averageSuffixLength,
104      int suffixLengthVariance,
105      int numberOfRows,
106
107      float chanceForSameQualifier,
108      float chanceForSimiliarQualifier,
109      int averageQualifierLength,
110      int qualifierLengthVariance,
111
112      int columnFamilyLength,
113      int valueLength,
114      float chanceForZeroValue,
115
116      int baseTimestampDivide,
117      int timestampDiffSize) {
118    this.randomizer = randomizer;
119
120    this.commonPrefix = DEFAULT_COMMON_PREFIX;
121    this.numberOfRowPrefixes = numberOfRowPrefixes;
122    this.averagePrefixLength = averagePrefixLength;
123    this.prefixLengthVariance = prefixLengthVariance;
124    this.averageSuffixLength = averageSuffixLength;
125    this.suffixLengthVariance = suffixLengthVariance;
126    this.numberOfRows = numberOfRows;
127
128    this.chanceForSameQualifier = chanceForSameQualifier;
129    this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
130    this.averageQualifierLength = averageQualifierLength;
131    this.qualifierLengthVariance = qualifierLengthVariance;
132
133    this.columnFamilyLength = columnFamilyLength;
134    this.valueLength = valueLength;
135    this.chanceForZeroValue = chanceForZeroValue;
136
137    this.baseTimestampDivide = baseTimestampDivide;
138    this.timestampDiffSize = timestampDiffSize;
139  }
140
141  /** Used to generate dataset */
142  private Random randomizer;
143
144  // row settings
145  private byte[] commonPrefix; //global prefix before rowPrefixes
146  private int numberOfRowPrefixes;
147  private int averagePrefixLength;
148  private int prefixLengthVariance;
149  private int averageSuffixLength;
150  private int suffixLengthVariance;
151  private int numberOfRows;
152
153  // family
154  private byte[] family;
155
156  // qualifier
157  private float chanceForSameQualifier;
158  private float chanceForSimilarQualifier;
159  private int averageQualifierLength;
160  private int qualifierLengthVariance;
161
162  private int columnFamilyLength;
163  private int valueLength;
164  private float chanceForZeroValue;
165
166  private int baseTimestampDivide;
167  private int timestampDiffSize;
168
169  private List<byte[]> generateRows() {
170    // generate prefixes
171    List<byte[]> prefixes = new ArrayList<>();
172    prefixes.add(new byte[0]);
173    for (int i = 1; i < numberOfRowPrefixes; ++i) {
174      int prefixLength = averagePrefixLength;
175      prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
176          prefixLengthVariance;
177      byte[] newPrefix = new byte[prefixLength];
178      randomizer.nextBytes(newPrefix);
179      prefixes.add(newPrefix);
180    }
181
182    // generate rest of the row
183    List<byte[]> rows = new ArrayList<>();
184    for (int i = 0; i < numberOfRows; ++i) {
185      int suffixLength = averageSuffixLength;
186      suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
187          suffixLengthVariance;
188      int randomPrefix = randomizer.nextInt(prefixes.size());
189      byte[] row = new byte[prefixes.get(randomPrefix).length +
190                            suffixLength];
191      byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
192      rows.add(rowWithCommonPrefix);
193    }
194
195    return rows;
196  }
197
198  /**
199   * Generate test data useful to test encoders.
200   * @param howMany How many Key values should be generated.
201   * @return sorted list of key values
202   */
203  public List<KeyValue> generateTestKeyValues(int howMany) {
204    return generateTestKeyValues(howMany, false);
205  }
206
207  /**
208   * Generate test data useful to test encoders.
209   * @param howMany How many Key values should be generated.
210   * @return sorted list of key values
211   */
212  public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
213    List<KeyValue> result = new ArrayList<>();
214
215    List<byte[]> rows = generateRows();
216    Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
217
218    if(family==null){
219      family = new byte[columnFamilyLength];
220      randomizer.nextBytes(family);
221    }
222
223    long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
224
225    byte[] value = new byte[valueLength];
226
227    for (int i = 0; i < howMany; ++i) {
228      long timestamp = baseTimestamp;
229      if(timestampDiffSize > 0){
230        timestamp += randomizer.nextInt(timestampDiffSize);
231      }
232      Integer rowId = randomizer.nextInt(rows.size());
233      byte[] row = rows.get(rowId);
234
235      // generate qualifier, sometimes it is same, sometimes similar,
236      // occasionally completely different
237      byte[] qualifier;
238      float qualifierChance = randomizer.nextFloat();
239      if (!rowsToQualifier.containsKey(rowId)
240          || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
241        int qualifierLength = averageQualifierLength;
242        qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
243            - qualifierLengthVariance;
244        qualifier = new byte[qualifierLength];
245        randomizer.nextBytes(qualifier);
246
247        // add it to map
248        if (!rowsToQualifier.containsKey(rowId)) {
249          rowsToQualifier.put(rowId, new ArrayList<>());
250        }
251        rowsToQualifier.get(rowId).add(qualifier);
252      } else if (qualifierChance > chanceForSameQualifier) {
253        // similar qualifier
254        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
255        byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
256            .size()));
257
258        qualifier = new byte[originalQualifier.length];
259        int commonPrefix = randomizer.nextInt(qualifier.length);
260        System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
261        for (int j = commonPrefix; j < qualifier.length; ++j) {
262          qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
263        }
264
265        rowsToQualifier.get(rowId).add(qualifier);
266      } else {
267        // same qualifier
268        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
269        qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
270      }
271
272      if (randomizer.nextFloat() < chanceForZeroValue) {
273        Arrays.fill(value, (byte) 0);
274      } else {
275        randomizer.nextBytes(value);
276      }
277
278      if (useTags) {
279        result.add(new KeyValue(row, family, qualifier, timestamp, value,
280            new Tag[] { new ArrayBackedTag((byte) 1, "value1") }));
281      } else {
282        result.add(new KeyValue(row, family, qualifier, timestamp, value));
283      }
284    }
285
286    result.sort(CellComparator.getInstance());
287
288    return result;
289  }
290
291  /**
292   * Generate test data useful to test encoders.
293   * @param howMany How many Key values should be generated.
294   * @return sorted list of key values
295   */
296  public List<Cell> generateTestExtendedOffheapKeyValues(int howMany, boolean useTags) {
297    List<Cell> result = new ArrayList<>();
298    List<byte[]> rows = generateRows();
299    Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
300
301    if (family == null) {
302      family = new byte[columnFamilyLength];
303      randomizer.nextBytes(family);
304    }
305
306    long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
307
308    byte[] value = new byte[valueLength];
309
310    for (int i = 0; i < howMany; ++i) {
311      long timestamp = baseTimestamp;
312      if(timestampDiffSize > 0){
313        timestamp += randomizer.nextInt(timestampDiffSize);
314      }
315      Integer rowId = randomizer.nextInt(rows.size());
316      byte[] row = rows.get(rowId);
317
318      // generate qualifier, sometimes it is same, sometimes similar,
319      // occasionally completely different
320      byte[] qualifier;
321      float qualifierChance = randomizer.nextFloat();
322      if (!rowsToQualifier.containsKey(rowId)
323          || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
324        int qualifierLength = averageQualifierLength;
325        qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
326            - qualifierLengthVariance;
327        qualifier = new byte[qualifierLength];
328        randomizer.nextBytes(qualifier);
329
330        // add it to map
331        if (!rowsToQualifier.containsKey(rowId)) {
332          rowsToQualifier.put(rowId, new ArrayList<>());
333        }
334        rowsToQualifier.get(rowId).add(qualifier);
335      } else if (qualifierChance > chanceForSameQualifier) {
336        // similar qualifier
337        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
338        byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
339            .size()));
340
341        qualifier = new byte[originalQualifier.length];
342        int commonPrefix = randomizer.nextInt(qualifier.length);
343        System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
344        for (int j = commonPrefix; j < qualifier.length; ++j) {
345          qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
346        }
347
348        rowsToQualifier.get(rowId).add(qualifier);
349      } else {
350        // same qualifier
351        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
352        qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
353      }
354
355      if (randomizer.nextFloat() < chanceForZeroValue) {
356        Arrays.fill(value, (byte) 0);
357      } else {
358        randomizer.nextBytes(value);
359      }
360      if (useTags) {
361        KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value,
362            new Tag[] { new ArrayBackedTag((byte) 1, "value1") });
363        ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
364        ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
365          keyValue.getOffset(), keyValue.getLength());
366        ByteBufferKeyValue offheapKV =
367            new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0);
368        result.add(offheapKV);
369      } else {
370        KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value);
371        ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
372        ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
373          keyValue.getOffset(), keyValue.getLength());
374        ByteBufferKeyValue offheapKV =
375            new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0);
376        result.add(offheapKV);
377      }
378    }
379
380    result.sort(CellComparator.getInstance());
381
382    return result;
383  }
384
385  static class ExtendedOffheapKeyValue extends ByteBufferKeyValue {
386    public ExtendedOffheapKeyValue(ByteBuffer buf, int offset, int length, long seqId) {
387      super(buf, offset, length, seqId);
388    }
389
390    @Override
391    public byte[] getRowArray() {
392      throw new IllegalArgumentException("getRowArray operation is not allowed");
393    }
394
395    @Override
396    public int getRowOffset() {
397      throw new IllegalArgumentException("getRowOffset operation is not allowed");
398    }
399
400    @Override
401    public byte[] getFamilyArray() {
402      throw new IllegalArgumentException("getFamilyArray operation is not allowed");
403    }
404
405    @Override
406    public int getFamilyOffset() {
407      throw new IllegalArgumentException("getFamilyOffset operation is not allowed");
408    }
409
410    @Override
411    public byte[] getQualifierArray() {
412      throw new IllegalArgumentException("getQualifierArray operation is not allowed");
413    }
414
415    @Override
416    public int getQualifierOffset() {
417      throw new IllegalArgumentException("getQualifierOffset operation is not allowed");
418    }
419
420    @Override
421    public byte[] getValueArray() {
422      throw new IllegalArgumentException("getValueArray operation is not allowed");
423    }
424
425    @Override
426    public int getValueOffset() {
427      throw new IllegalArgumentException("getValueOffset operation is not allowed");
428    }
429
430    @Override
431    public byte[] getTagsArray() {
432      throw new IllegalArgumentException("getTagsArray operation is not allowed");
433    }
434
435    @Override
436    public int getTagsOffset() {
437      throw new IllegalArgumentException("getTagsOffset operation is not allowed");
438    }
439  }
440
441  /**
442   * Convert list of KeyValues to byte buffer.
443   * @param keyValues list of KeyValues to be converted.
444   * @return buffer with content from key values
445   */
446  public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
447      boolean includesMemstoreTS) {
448    int totalSize = 0;
449    for (KeyValue kv : keyValues) {
450      totalSize += kv.getLength();
451      if (includesMemstoreTS) {
452        totalSize += WritableUtils.getVIntSize(kv.getSequenceId());
453      }
454    }
455
456    ByteBuffer result = ByteBuffer.allocate(totalSize);
457    for (KeyValue kv : keyValues) {
458      result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
459      if (includesMemstoreTS) {
460        ByteBufferUtils.writeVLong(result, kv.getSequenceId());
461      }
462    }
463    return result;
464  }
465
466  public RedundantKVGenerator setFamily(byte[] family) {
467    this.family = family;
468    this.columnFamilyLength = family.length;
469    return this;
470  }
471}