001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.nio.ByteBuffer;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026import java.util.Random;
027import org.apache.hadoop.hbase.ArrayBackedTag;
028import org.apache.hadoop.hbase.ByteBufferKeyValue;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellComparator;
031import org.apache.hadoop.hbase.KeyValue;
032import org.apache.hadoop.hbase.Tag;
033import org.apache.hadoop.io.WritableUtils;
034import org.apache.yetus.audience.InterfaceAudience;
035
036import org.apache.hbase.thirdparty.com.google.common.primitives.Bytes;
037
038/**
039 * Generate list of key values which are very useful to test data block encoding and compression.
040 */
041@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
042    justification = "Should probably fix")
043@InterfaceAudience.Private
044public class RedundantKVGenerator {
045  // row settings
046  static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
047  static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
048  static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
049  static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
050  static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
051  static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
052  static int DEFAULT_NUMBER_OF_ROW = 500;
053
054  // qualifier
055  static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
056  static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
057  static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
058  static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
059
060  static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
061  static int DEFAULT_VALUE_LENGTH = 8;
062  static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
063
064  static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
065  static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
066
067  /**
068   * Default constructor, assumes all parameters from class constants.
069   */
070  public RedundantKVGenerator() {
071    this(new Random(42L), DEFAULT_NUMBER_OF_ROW_PREFIXES, DEFAULT_AVERAGE_PREFIX_LENGTH,
072      DEFAULT_PREFIX_LENGTH_VARIANCE, DEFAULT_AVERAGE_SUFFIX_LENGTH, DEFAULT_SUFFIX_LENGTH_VARIANCE,
073      DEFAULT_NUMBER_OF_ROW,
074
075      DEFAULT_CHANCE_FOR_SAME_QUALIFIER, DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
076      DEFAULT_AVERAGE_QUALIFIER_LENGTH, DEFAULT_QUALIFIER_LENGTH_VARIANCE,
077
078      DEFAULT_COLUMN_FAMILY_LENGTH, DEFAULT_VALUE_LENGTH, DEFAULT_CHANCE_FOR_ZERO_VALUE,
079
080      DEFAULT_BASE_TIMESTAMP_DIVIDE, DEFAULT_TIMESTAMP_DIFF_SIZE);
081  }
082
083  /**
084   * Various configuration options for generating key values
085   * @param randomizer pick things by random
086   */
087  public RedundantKVGenerator(Random randomizer, int numberOfRowPrefixes, int averagePrefixLength,
088    int prefixLengthVariance, int averageSuffixLength, int suffixLengthVariance, int numberOfRows,
089
090    float chanceForSameQualifier, float chanceForSimiliarQualifier, int averageQualifierLength,
091    int qualifierLengthVariance,
092
093    int columnFamilyLength, int valueLength, float chanceForZeroValue,
094
095    int baseTimestampDivide, int timestampDiffSize) {
096    this.randomizer = randomizer;
097
098    this.commonPrefix = DEFAULT_COMMON_PREFIX;
099    this.numberOfRowPrefixes = numberOfRowPrefixes;
100    this.averagePrefixLength = averagePrefixLength;
101    this.prefixLengthVariance = prefixLengthVariance;
102    this.averageSuffixLength = averageSuffixLength;
103    this.suffixLengthVariance = suffixLengthVariance;
104    this.numberOfRows = numberOfRows;
105
106    this.chanceForSameQualifier = chanceForSameQualifier;
107    this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
108    this.averageQualifierLength = averageQualifierLength;
109    this.qualifierLengthVariance = qualifierLengthVariance;
110
111    this.columnFamilyLength = columnFamilyLength;
112    this.valueLength = valueLength;
113    this.chanceForZeroValue = chanceForZeroValue;
114
115    this.baseTimestampDivide = baseTimestampDivide;
116    this.timestampDiffSize = timestampDiffSize;
117  }
118
119  /** Used to generate dataset */
120  private Random randomizer;
121
122  // row settings
123  private byte[] commonPrefix; // global prefix before rowPrefixes
124  private int numberOfRowPrefixes;
125  private int averagePrefixLength;
126  private int prefixLengthVariance;
127  private int averageSuffixLength;
128  private int suffixLengthVariance;
129  private int numberOfRows;
130
131  // family
132  private byte[] family;
133
134  // qualifier
135  private float chanceForSameQualifier;
136  private float chanceForSimilarQualifier;
137  private int averageQualifierLength;
138  private int qualifierLengthVariance;
139
140  private int columnFamilyLength;
141  private int valueLength;
142  private float chanceForZeroValue;
143
144  private int baseTimestampDivide;
145  private int timestampDiffSize;
146
147  private List<byte[]> generateRows() {
148    // generate prefixes
149    List<byte[]> prefixes = new ArrayList<>();
150    prefixes.add(new byte[0]);
151    for (int i = 1; i < numberOfRowPrefixes; ++i) {
152      int prefixLength = averagePrefixLength;
153      prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) - prefixLengthVariance;
154      byte[] newPrefix = new byte[prefixLength];
155      randomizer.nextBytes(newPrefix);
156      prefixes.add(newPrefix);
157    }
158
159    // generate rest of the row
160    List<byte[]> rows = new ArrayList<>();
161    for (int i = 0; i < numberOfRows; ++i) {
162      int suffixLength = averageSuffixLength;
163      suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) - suffixLengthVariance;
164      int randomPrefix = randomizer.nextInt(prefixes.size());
165      byte[] row = new byte[prefixes.get(randomPrefix).length + suffixLength];
166      byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
167      rows.add(rowWithCommonPrefix);
168    }
169
170    return rows;
171  }
172
173  /**
174   * Generate test data useful to test encoders.
175   * @param howMany How many Key values should be generated.
176   * @return sorted list of key values
177   */
178  public List<KeyValue> generateTestKeyValues(int howMany) {
179    return generateTestKeyValues(howMany, false);
180  }
181
182  /**
183   * Generate test data useful to test encoders.
184   * @param howMany How many Key values should be generated.
185   * @return sorted list of key values
186   */
187  public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
188    List<KeyValue> result = new ArrayList<>();
189
190    List<byte[]> rows = generateRows();
191    Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
192
193    if (family == null) {
194      family = new byte[columnFamilyLength];
195      randomizer.nextBytes(family);
196    }
197
198    long baseTimestamp = randomizer.nextInt(Integer.MAX_VALUE) / baseTimestampDivide;
199
200    byte[] value = new byte[valueLength];
201
202    for (int i = 0; i < howMany; ++i) {
203      long timestamp = baseTimestamp;
204      if (timestampDiffSize > 0) {
205        timestamp += randomizer.nextInt(timestampDiffSize);
206      }
207      Integer rowId = randomizer.nextInt(rows.size());
208      byte[] row = rows.get(rowId);
209
210      // generate qualifier, sometimes it is same, sometimes similar,
211      // occasionally completely different
212      byte[] qualifier;
213      float qualifierChance = randomizer.nextFloat();
214      if (
215        !rowsToQualifier.containsKey(rowId)
216          || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier
217      ) {
218        int qualifierLength = averageQualifierLength;
219        qualifierLength +=
220          randomizer.nextInt(2 * qualifierLengthVariance + 1) - qualifierLengthVariance;
221        qualifier = new byte[qualifierLength];
222        randomizer.nextBytes(qualifier);
223
224        // add it to map
225        if (!rowsToQualifier.containsKey(rowId)) {
226          rowsToQualifier.put(rowId, new ArrayList<>());
227        }
228        rowsToQualifier.get(rowId).add(qualifier);
229      } else if (qualifierChance > chanceForSameQualifier) {
230        // similar qualifier
231        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
232        byte[] originalQualifier =
233          previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
234
235        qualifier = new byte[originalQualifier.length];
236        int commonPrefix = randomizer.nextInt(qualifier.length);
237        System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
238        for (int j = commonPrefix; j < qualifier.length; ++j) {
239          qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
240        }
241
242        rowsToQualifier.get(rowId).add(qualifier);
243      } else {
244        // same qualifier
245        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
246        qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
247      }
248
249      if (randomizer.nextFloat() < chanceForZeroValue) {
250        Arrays.fill(value, (byte) 0);
251      } else {
252        randomizer.nextBytes(value);
253      }
254
255      if (useTags) {
256        result.add(new KeyValue(row, family, qualifier, timestamp, value,
257          new Tag[] { new ArrayBackedTag((byte) 1, "value1") }));
258      } else {
259        result.add(new KeyValue(row, family, qualifier, timestamp, value));
260      }
261    }
262
263    result.sort(CellComparator.getInstance());
264
265    return result;
266  }
267
268  /**
269   * Generate test data useful to test encoders.
270   * @param howMany How many Key values should be generated.
271   * @return sorted list of key values
272   */
273  public List<Cell> generateTestExtendedOffheapKeyValues(int howMany, boolean useTags) {
274    List<Cell> result = new ArrayList<>();
275    List<byte[]> rows = generateRows();
276    Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
277
278    if (family == null) {
279      family = new byte[columnFamilyLength];
280      randomizer.nextBytes(family);
281    }
282
283    long baseTimestamp = randomizer.nextInt(Integer.MAX_VALUE) / baseTimestampDivide;
284
285    byte[] value = new byte[valueLength];
286
287    for (int i = 0; i < howMany; ++i) {
288      long timestamp = baseTimestamp;
289      if (timestampDiffSize > 0) {
290        timestamp += randomizer.nextInt(timestampDiffSize);
291      }
292      Integer rowId = randomizer.nextInt(rows.size());
293      byte[] row = rows.get(rowId);
294
295      // generate qualifier, sometimes it is same, sometimes similar,
296      // occasionally completely different
297      byte[] qualifier;
298      float qualifierChance = randomizer.nextFloat();
299      if (
300        !rowsToQualifier.containsKey(rowId)
301          || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier
302      ) {
303        int qualifierLength = averageQualifierLength;
304        qualifierLength +=
305          randomizer.nextInt(2 * qualifierLengthVariance + 1) - qualifierLengthVariance;
306        qualifier = new byte[qualifierLength];
307        randomizer.nextBytes(qualifier);
308
309        // add it to map
310        if (!rowsToQualifier.containsKey(rowId)) {
311          rowsToQualifier.put(rowId, new ArrayList<>());
312        }
313        rowsToQualifier.get(rowId).add(qualifier);
314      } else if (qualifierChance > chanceForSameQualifier) {
315        // similar qualifier
316        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
317        byte[] originalQualifier =
318          previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
319
320        qualifier = new byte[originalQualifier.length];
321        int commonPrefix = randomizer.nextInt(qualifier.length);
322        System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
323        for (int j = commonPrefix; j < qualifier.length; ++j) {
324          qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
325        }
326
327        rowsToQualifier.get(rowId).add(qualifier);
328      } else {
329        // same qualifier
330        List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
331        qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
332      }
333
334      if (randomizer.nextFloat() < chanceForZeroValue) {
335        Arrays.fill(value, (byte) 0);
336      } else {
337        randomizer.nextBytes(value);
338      }
339      if (useTags) {
340        KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value,
341          new Tag[] { new ArrayBackedTag((byte) 1, "value1") });
342        ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
343        ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
344          keyValue.getOffset(), keyValue.getLength());
345        ByteBufferKeyValue offheapKV =
346          new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0);
347        result.add(offheapKV);
348      } else {
349        KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value);
350        ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength());
351        ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(),
352          keyValue.getOffset(), keyValue.getLength());
353        ByteBufferKeyValue offheapKV =
354          new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0);
355        result.add(offheapKV);
356      }
357    }
358
359    result.sort(CellComparator.getInstance());
360
361    return result;
362  }
363
364  static class ExtendedOffheapKeyValue extends ByteBufferKeyValue {
365    public ExtendedOffheapKeyValue(ByteBuffer buf, int offset, int length, long seqId) {
366      super(buf, offset, length, seqId);
367    }
368
369    @Override
370    public byte[] getRowArray() {
371      throw new IllegalArgumentException("getRowArray operation is not allowed");
372    }
373
374    @Override
375    public int getRowOffset() {
376      throw new IllegalArgumentException("getRowOffset operation is not allowed");
377    }
378
379    @Override
380    public byte[] getFamilyArray() {
381      throw new IllegalArgumentException("getFamilyArray operation is not allowed");
382    }
383
384    @Override
385    public int getFamilyOffset() {
386      throw new IllegalArgumentException("getFamilyOffset operation is not allowed");
387    }
388
389    @Override
390    public byte[] getQualifierArray() {
391      throw new IllegalArgumentException("getQualifierArray operation is not allowed");
392    }
393
394    @Override
395    public int getQualifierOffset() {
396      throw new IllegalArgumentException("getQualifierOffset operation is not allowed");
397    }
398
399    @Override
400    public byte[] getValueArray() {
401      throw new IllegalArgumentException("getValueArray operation is not allowed");
402    }
403
404    @Override
405    public int getValueOffset() {
406      throw new IllegalArgumentException("getValueOffset operation is not allowed");
407    }
408
409    @Override
410    public byte[] getTagsArray() {
411      throw new IllegalArgumentException("getTagsArray operation is not allowed");
412    }
413
414    @Override
415    public int getTagsOffset() {
416      throw new IllegalArgumentException("getTagsOffset operation is not allowed");
417    }
418  }
419
420  /**
421   * Convert list of KeyValues to byte buffer.
422   * @param keyValues list of KeyValues to be converted.
423   * @return buffer with content from key values
424   */
425  public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
426    boolean includesMemstoreTS) {
427    int totalSize = 0;
428    for (KeyValue kv : keyValues) {
429      totalSize += kv.getLength();
430      if (includesMemstoreTS) {
431        totalSize += WritableUtils.getVIntSize(kv.getSequenceId());
432      }
433    }
434
435    ByteBuffer result = ByteBuffer.allocate(totalSize);
436    for (KeyValue kv : keyValues) {
437      result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
438      if (includesMemstoreTS) {
439        ByteBufferUtils.writeVLong(result, kv.getSequenceId());
440      }
441    }
442    return result;
443  }
444
445  public RedundantKVGenerator setFamily(byte[] family) {
446    this.family = family;
447    this.columnFamilyLength = family.length;
448    return this;
449  }
450}