001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017package org.apache.hadoop.hbase.util; 018 019import java.nio.ByteBuffer; 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.HashMap; 023import java.util.List; 024import java.util.Map; 025import java.util.Random; 026 027import org.apache.hadoop.hbase.ArrayBackedTag; 028import org.apache.hadoop.hbase.ByteBufferKeyValue; 029import org.apache.hadoop.hbase.Cell; 030import org.apache.hadoop.hbase.CellComparator; 031import org.apache.hadoop.hbase.KeyValue; 032import org.apache.hadoop.hbase.Tag; 033import org.apache.hadoop.io.WritableUtils; 034import org.apache.yetus.audience.InterfaceAudience; 035 036import org.apache.hbase.thirdparty.com.google.common.primitives.Bytes; 037 038/** 039 * Generate list of key values which are very useful to test data block encoding 040 * and compression. 041 */ 042@edu.umd.cs.findbugs.annotations.SuppressWarnings( 043 value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT", 044 justification="Should probably fix") 045@InterfaceAudience.Private 046public class RedundantKVGenerator { 047 // row settings 048 static byte[] DEFAULT_COMMON_PREFIX = new byte[0]; 049 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10; 050 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6; 051 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3; 052 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3; 053 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3; 054 static int DEFAULT_NUMBER_OF_ROW = 500; 055 056 // qualifier 057 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f; 058 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f; 059 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9; 060 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3; 061 062 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9; 063 static int DEFAULT_VALUE_LENGTH = 8; 064 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f; 065 066 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000; 067 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000; 068 069 /** 070 * Default constructor, assumes all parameters from class constants. 071 */ 072 public RedundantKVGenerator() { 073 this(new Random(42L), 074 DEFAULT_NUMBER_OF_ROW_PREFIXES, 075 DEFAULT_AVERAGE_PREFIX_LENGTH, 076 DEFAULT_PREFIX_LENGTH_VARIANCE, 077 DEFAULT_AVERAGE_SUFFIX_LENGTH, 078 DEFAULT_SUFFIX_LENGTH_VARIANCE, 079 DEFAULT_NUMBER_OF_ROW, 080 081 DEFAULT_CHANCE_FOR_SAME_QUALIFIER, 082 DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER, 083 DEFAULT_AVERAGE_QUALIFIER_LENGTH, 084 DEFAULT_QUALIFIER_LENGTH_VARIANCE, 085 086 DEFAULT_COLUMN_FAMILY_LENGTH, 087 DEFAULT_VALUE_LENGTH, 088 DEFAULT_CHANCE_FOR_ZERO_VALUE, 089 090 DEFAULT_BASE_TIMESTAMP_DIVIDE, 091 DEFAULT_TIMESTAMP_DIFF_SIZE 092 ); 093 } 094 095 /** 096 * Various configuration options for generating key values 097 * @param randomizer pick things by random 098 */ 099 public RedundantKVGenerator(Random randomizer, 100 int numberOfRowPrefixes, 101 int averagePrefixLength, 102 int prefixLengthVariance, 103 int averageSuffixLength, 104 int suffixLengthVariance, 105 int numberOfRows, 106 107 float chanceForSameQualifier, 108 float chanceForSimiliarQualifier, 109 int averageQualifierLength, 110 int qualifierLengthVariance, 111 112 int columnFamilyLength, 113 int valueLength, 114 float chanceForZeroValue, 115 116 int baseTimestampDivide, 117 int timestampDiffSize) { 118 this.randomizer = randomizer; 119 120 this.commonPrefix = DEFAULT_COMMON_PREFIX; 121 this.numberOfRowPrefixes = numberOfRowPrefixes; 122 this.averagePrefixLength = averagePrefixLength; 123 this.prefixLengthVariance = prefixLengthVariance; 124 this.averageSuffixLength = averageSuffixLength; 125 this.suffixLengthVariance = suffixLengthVariance; 126 this.numberOfRows = numberOfRows; 127 128 this.chanceForSameQualifier = chanceForSameQualifier; 129 this.chanceForSimilarQualifier = chanceForSimiliarQualifier; 130 this.averageQualifierLength = averageQualifierLength; 131 this.qualifierLengthVariance = qualifierLengthVariance; 132 133 this.columnFamilyLength = columnFamilyLength; 134 this.valueLength = valueLength; 135 this.chanceForZeroValue = chanceForZeroValue; 136 137 this.baseTimestampDivide = baseTimestampDivide; 138 this.timestampDiffSize = timestampDiffSize; 139 } 140 141 /** Used to generate dataset */ 142 private Random randomizer; 143 144 // row settings 145 private byte[] commonPrefix; //global prefix before rowPrefixes 146 private int numberOfRowPrefixes; 147 private int averagePrefixLength; 148 private int prefixLengthVariance; 149 private int averageSuffixLength; 150 private int suffixLengthVariance; 151 private int numberOfRows; 152 153 // family 154 private byte[] family; 155 156 // qualifier 157 private float chanceForSameQualifier; 158 private float chanceForSimilarQualifier; 159 private int averageQualifierLength; 160 private int qualifierLengthVariance; 161 162 private int columnFamilyLength; 163 private int valueLength; 164 private float chanceForZeroValue; 165 166 private int baseTimestampDivide; 167 private int timestampDiffSize; 168 169 private List<byte[]> generateRows() { 170 // generate prefixes 171 List<byte[]> prefixes = new ArrayList<>(); 172 prefixes.add(new byte[0]); 173 for (int i = 1; i < numberOfRowPrefixes; ++i) { 174 int prefixLength = averagePrefixLength; 175 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) - 176 prefixLengthVariance; 177 byte[] newPrefix = new byte[prefixLength]; 178 randomizer.nextBytes(newPrefix); 179 prefixes.add(newPrefix); 180 } 181 182 // generate rest of the row 183 List<byte[]> rows = new ArrayList<>(); 184 for (int i = 0; i < numberOfRows; ++i) { 185 int suffixLength = averageSuffixLength; 186 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) - 187 suffixLengthVariance; 188 int randomPrefix = randomizer.nextInt(prefixes.size()); 189 byte[] row = new byte[prefixes.get(randomPrefix).length + 190 suffixLength]; 191 byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row); 192 rows.add(rowWithCommonPrefix); 193 } 194 195 return rows; 196 } 197 198 /** 199 * Generate test data useful to test encoders. 200 * @param howMany How many Key values should be generated. 201 * @return sorted list of key values 202 */ 203 public List<KeyValue> generateTestKeyValues(int howMany) { 204 return generateTestKeyValues(howMany, false); 205 } 206 207 /** 208 * Generate test data useful to test encoders. 209 * @param howMany How many Key values should be generated. 210 * @return sorted list of key values 211 */ 212 public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) { 213 List<KeyValue> result = new ArrayList<>(); 214 215 List<byte[]> rows = generateRows(); 216 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>(); 217 218 if(family==null){ 219 family = new byte[columnFamilyLength]; 220 randomizer.nextBytes(family); 221 } 222 223 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide; 224 225 byte[] value = new byte[valueLength]; 226 227 for (int i = 0; i < howMany; ++i) { 228 long timestamp = baseTimestamp; 229 if(timestampDiffSize > 0){ 230 timestamp += randomizer.nextInt(timestampDiffSize); 231 } 232 Integer rowId = randomizer.nextInt(rows.size()); 233 byte[] row = rows.get(rowId); 234 235 // generate qualifier, sometimes it is same, sometimes similar, 236 // occasionally completely different 237 byte[] qualifier; 238 float qualifierChance = randomizer.nextFloat(); 239 if (!rowsToQualifier.containsKey(rowId) 240 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) { 241 int qualifierLength = averageQualifierLength; 242 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1) 243 - qualifierLengthVariance; 244 qualifier = new byte[qualifierLength]; 245 randomizer.nextBytes(qualifier); 246 247 // add it to map 248 if (!rowsToQualifier.containsKey(rowId)) { 249 rowsToQualifier.put(rowId, new ArrayList<>()); 250 } 251 rowsToQualifier.get(rowId).add(qualifier); 252 } else if (qualifierChance > chanceForSameQualifier) { 253 // similar qualifier 254 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 255 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers 256 .size())); 257 258 qualifier = new byte[originalQualifier.length]; 259 int commonPrefix = randomizer.nextInt(qualifier.length); 260 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix); 261 for (int j = commonPrefix; j < qualifier.length; ++j) { 262 qualifier[j] = (byte) (randomizer.nextInt() & 0xff); 263 } 264 265 rowsToQualifier.get(rowId).add(qualifier); 266 } else { 267 // same qualifier 268 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 269 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 270 } 271 272 if (randomizer.nextFloat() < chanceForZeroValue) { 273 Arrays.fill(value, (byte) 0); 274 } else { 275 randomizer.nextBytes(value); 276 } 277 278 if (useTags) { 279 result.add(new KeyValue(row, family, qualifier, timestamp, value, 280 new Tag[] { new ArrayBackedTag((byte) 1, "value1") })); 281 } else { 282 result.add(new KeyValue(row, family, qualifier, timestamp, value)); 283 } 284 } 285 286 result.sort(CellComparator.getInstance()); 287 288 return result; 289 } 290 291 /** 292 * Generate test data useful to test encoders. 293 * @param howMany How many Key values should be generated. 294 * @return sorted list of key values 295 */ 296 public List<Cell> generateTestExtendedOffheapKeyValues(int howMany, boolean useTags) { 297 List<Cell> result = new ArrayList<>(); 298 List<byte[]> rows = generateRows(); 299 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>(); 300 301 if (family == null) { 302 family = new byte[columnFamilyLength]; 303 randomizer.nextBytes(family); 304 } 305 306 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide; 307 308 byte[] value = new byte[valueLength]; 309 310 for (int i = 0; i < howMany; ++i) { 311 long timestamp = baseTimestamp; 312 if(timestampDiffSize > 0){ 313 timestamp += randomizer.nextInt(timestampDiffSize); 314 } 315 Integer rowId = randomizer.nextInt(rows.size()); 316 byte[] row = rows.get(rowId); 317 318 // generate qualifier, sometimes it is same, sometimes similar, 319 // occasionally completely different 320 byte[] qualifier; 321 float qualifierChance = randomizer.nextFloat(); 322 if (!rowsToQualifier.containsKey(rowId) 323 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) { 324 int qualifierLength = averageQualifierLength; 325 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1) 326 - qualifierLengthVariance; 327 qualifier = new byte[qualifierLength]; 328 randomizer.nextBytes(qualifier); 329 330 // add it to map 331 if (!rowsToQualifier.containsKey(rowId)) { 332 rowsToQualifier.put(rowId, new ArrayList<>()); 333 } 334 rowsToQualifier.get(rowId).add(qualifier); 335 } else if (qualifierChance > chanceForSameQualifier) { 336 // similar qualifier 337 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 338 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers 339 .size())); 340 341 qualifier = new byte[originalQualifier.length]; 342 int commonPrefix = randomizer.nextInt(qualifier.length); 343 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix); 344 for (int j = commonPrefix; j < qualifier.length; ++j) { 345 qualifier[j] = (byte) (randomizer.nextInt() & 0xff); 346 } 347 348 rowsToQualifier.get(rowId).add(qualifier); 349 } else { 350 // same qualifier 351 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 352 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 353 } 354 355 if (randomizer.nextFloat() < chanceForZeroValue) { 356 Arrays.fill(value, (byte) 0); 357 } else { 358 randomizer.nextBytes(value); 359 } 360 if (useTags) { 361 KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value, 362 new Tag[] { new ArrayBackedTag((byte) 1, "value1") }); 363 ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength()); 364 ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(), 365 keyValue.getOffset(), keyValue.getLength()); 366 ByteBufferKeyValue offheapKV = 367 new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0); 368 result.add(offheapKV); 369 } else { 370 KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value); 371 ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength()); 372 ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(), 373 keyValue.getOffset(), keyValue.getLength()); 374 ByteBufferKeyValue offheapKV = 375 new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0); 376 result.add(offheapKV); 377 } 378 } 379 380 result.sort(CellComparator.getInstance()); 381 382 return result; 383 } 384 385 static class ExtendedOffheapKeyValue extends ByteBufferKeyValue { 386 public ExtendedOffheapKeyValue(ByteBuffer buf, int offset, int length, long seqId) { 387 super(buf, offset, length, seqId); 388 } 389 390 @Override 391 public byte[] getRowArray() { 392 throw new IllegalArgumentException("getRowArray operation is not allowed"); 393 } 394 395 @Override 396 public int getRowOffset() { 397 throw new IllegalArgumentException("getRowOffset operation is not allowed"); 398 } 399 400 @Override 401 public byte[] getFamilyArray() { 402 throw new IllegalArgumentException("getFamilyArray operation is not allowed"); 403 } 404 405 @Override 406 public int getFamilyOffset() { 407 throw new IllegalArgumentException("getFamilyOffset operation is not allowed"); 408 } 409 410 @Override 411 public byte[] getQualifierArray() { 412 throw new IllegalArgumentException("getQualifierArray operation is not allowed"); 413 } 414 415 @Override 416 public int getQualifierOffset() { 417 throw new IllegalArgumentException("getQualifierOffset operation is not allowed"); 418 } 419 420 @Override 421 public byte[] getValueArray() { 422 throw new IllegalArgumentException("getValueArray operation is not allowed"); 423 } 424 425 @Override 426 public int getValueOffset() { 427 throw new IllegalArgumentException("getValueOffset operation is not allowed"); 428 } 429 430 @Override 431 public byte[] getTagsArray() { 432 throw new IllegalArgumentException("getTagsArray operation is not allowed"); 433 } 434 435 @Override 436 public int getTagsOffset() { 437 throw new IllegalArgumentException("getTagsOffset operation is not allowed"); 438 } 439 } 440 441 /** 442 * Convert list of KeyValues to byte buffer. 443 * @param keyValues list of KeyValues to be converted. 444 * @return buffer with content from key values 445 */ 446 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues, 447 boolean includesMemstoreTS) { 448 int totalSize = 0; 449 for (KeyValue kv : keyValues) { 450 totalSize += kv.getLength(); 451 if (includesMemstoreTS) { 452 totalSize += WritableUtils.getVIntSize(kv.getSequenceId()); 453 } 454 } 455 456 ByteBuffer result = ByteBuffer.allocate(totalSize); 457 for (KeyValue kv : keyValues) { 458 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength()); 459 if (includesMemstoreTS) { 460 ByteBufferUtils.writeVLong(result, kv.getSequenceId()); 461 } 462 } 463 return result; 464 } 465 466 public RedundantKVGenerator setFamily(byte[] family) { 467 this.family = family; 468 this.columnFamilyLength = family.length; 469 return this; 470 } 471}