001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017package org.apache.hadoop.hbase.util; 018 019import java.nio.ByteBuffer; 020import java.util.ArrayList; 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.List; 024import java.util.Map; 025import java.util.Random; 026 027import org.apache.hadoop.hbase.ArrayBackedTag; 028import org.apache.hadoop.hbase.ByteBufferKeyValue; 029import org.apache.hadoop.hbase.Cell; 030import org.apache.hadoop.hbase.CellComparator; 031import org.apache.hadoop.hbase.KeyValue; 032import org.apache.hadoop.hbase.Tag; 033import org.apache.hadoop.io.WritableUtils; 034import org.apache.yetus.audience.InterfaceAudience; 035 036import org.apache.hbase.thirdparty.com.google.common.primitives.Bytes; 037 038/** 039 * Generate list of key values which are very useful to test data block encoding 040 * and compression. 041 */ 042@edu.umd.cs.findbugs.annotations.SuppressWarnings( 043 value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT", 044 justification="Should probably fix") 045@InterfaceAudience.Private 046public class RedundantKVGenerator { 047 // row settings 048 static byte[] DEFAULT_COMMON_PREFIX = new byte[0]; 049 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10; 050 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6; 051 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3; 052 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3; 053 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3; 054 static int DEFAULT_NUMBER_OF_ROW = 500; 055 056 // qualifier 057 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f; 058 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f; 059 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9; 060 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3; 061 062 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9; 063 static int DEFAULT_VALUE_LENGTH = 8; 064 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f; 065 066 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000; 067 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000; 068 069 /** 070 * Default constructor, assumes all parameters from class constants. 071 */ 072 public RedundantKVGenerator() { 073 this(new Random(42L), 074 DEFAULT_NUMBER_OF_ROW_PREFIXES, 075 DEFAULT_AVERAGE_PREFIX_LENGTH, 076 DEFAULT_PREFIX_LENGTH_VARIANCE, 077 DEFAULT_AVERAGE_SUFFIX_LENGTH, 078 DEFAULT_SUFFIX_LENGTH_VARIANCE, 079 DEFAULT_NUMBER_OF_ROW, 080 081 DEFAULT_CHANCE_FOR_SAME_QUALIFIER, 082 DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER, 083 DEFAULT_AVERAGE_QUALIFIER_LENGTH, 084 DEFAULT_QUALIFIER_LENGTH_VARIANCE, 085 086 DEFAULT_COLUMN_FAMILY_LENGTH, 087 DEFAULT_VALUE_LENGTH, 088 DEFAULT_CHANCE_FOR_ZERO_VALUE, 089 090 DEFAULT_BASE_TIMESTAMP_DIVIDE, 091 DEFAULT_TIMESTAMP_DIFF_SIZE 092 ); 093 } 094 095 096 /** 097 * Various configuration options for generating key values 098 * @param randomizer pick things by random 099 */ 100 public RedundantKVGenerator(Random randomizer, 101 int numberOfRowPrefixes, 102 int averagePrefixLength, 103 int prefixLengthVariance, 104 int averageSuffixLength, 105 int suffixLengthVariance, 106 int numberOfRows, 107 108 float chanceForSameQualifier, 109 float chanceForSimiliarQualifier, 110 int averageQualifierLength, 111 int qualifierLengthVariance, 112 113 int columnFamilyLength, 114 int valueLength, 115 float chanceForZeroValue, 116 117 int baseTimestampDivide, 118 int timestampDiffSize 119 ) { 120 this.randomizer = randomizer; 121 122 this.commonPrefix = DEFAULT_COMMON_PREFIX; 123 this.numberOfRowPrefixes = numberOfRowPrefixes; 124 this.averagePrefixLength = averagePrefixLength; 125 this.prefixLengthVariance = prefixLengthVariance; 126 this.averageSuffixLength = averageSuffixLength; 127 this.suffixLengthVariance = suffixLengthVariance; 128 this.numberOfRows = numberOfRows; 129 130 this.chanceForSameQualifier = chanceForSameQualifier; 131 this.chanceForSimilarQualifier = chanceForSimiliarQualifier; 132 this.averageQualifierLength = averageQualifierLength; 133 this.qualifierLengthVariance = qualifierLengthVariance; 134 135 this.columnFamilyLength = columnFamilyLength; 136 this.valueLength = valueLength; 137 this.chanceForZeroValue = chanceForZeroValue; 138 139 this.baseTimestampDivide = baseTimestampDivide; 140 this.timestampDiffSize = timestampDiffSize; 141 } 142 143 /** Used to generate dataset */ 144 private Random randomizer; 145 146 // row settings 147 private byte[] commonPrefix;//global prefix before rowPrefixes 148 private int numberOfRowPrefixes; 149 private int averagePrefixLength = 6; 150 private int prefixLengthVariance = 3; 151 private int averageSuffixLength = 3; 152 private int suffixLengthVariance = 3; 153 private int numberOfRows = 500; 154 155 //family 156 private byte[] family; 157 158 // qualifier 159 private float chanceForSameQualifier = 0.5f; 160 private float chanceForSimilarQualifier = 0.4f; 161 private int averageQualifierLength = 9; 162 private int qualifierLengthVariance = 3; 163 164 private int columnFamilyLength = 9; 165 private int valueLength = 8; 166 private float chanceForZeroValue = 0.5f; 167 168 private int baseTimestampDivide = 1000000; 169 private int timestampDiffSize = 100000000; 170 171 private List<byte[]> generateRows() { 172 // generate prefixes 173 List<byte[]> prefixes = new ArrayList<>(); 174 prefixes.add(new byte[0]); 175 for (int i = 1; i < numberOfRowPrefixes; ++i) { 176 int prefixLength = averagePrefixLength; 177 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) - 178 prefixLengthVariance; 179 byte[] newPrefix = new byte[prefixLength]; 180 randomizer.nextBytes(newPrefix); 181 byte[] newPrefixWithCommon = newPrefix; 182 prefixes.add(newPrefixWithCommon); 183 } 184 185 // generate rest of the row 186 List<byte[]> rows = new ArrayList<>(); 187 for (int i = 0; i < numberOfRows; ++i) { 188 int suffixLength = averageSuffixLength; 189 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) - 190 suffixLengthVariance; 191 int randomPrefix = randomizer.nextInt(prefixes.size()); 192 byte[] row = new byte[prefixes.get(randomPrefix).length + 193 suffixLength]; 194 byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row); 195 rows.add(rowWithCommonPrefix); 196 } 197 198 return rows; 199 } 200 201 /** 202 * Generate test data useful to test encoders. 203 * @param howMany How many Key values should be generated. 204 * @return sorted list of key values 205 */ 206 public List<KeyValue> generateTestKeyValues(int howMany) { 207 return generateTestKeyValues(howMany, false); 208 } 209 /** 210 * Generate test data useful to test encoders. 211 * @param howMany How many Key values should be generated. 212 * @return sorted list of key values 213 */ 214 public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) { 215 List<KeyValue> result = new ArrayList<>(); 216 217 List<byte[]> rows = generateRows(); 218 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>(); 219 220 if(family==null){ 221 family = new byte[columnFamilyLength]; 222 randomizer.nextBytes(family); 223 } 224 225 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide; 226 227 byte[] value = new byte[valueLength]; 228 229 for (int i = 0; i < howMany; ++i) { 230 long timestamp = baseTimestamp; 231 if(timestampDiffSize > 0){ 232 timestamp += randomizer.nextInt(timestampDiffSize); 233 } 234 Integer rowId = randomizer.nextInt(rows.size()); 235 byte[] row = rows.get(rowId); 236 237 // generate qualifier, sometimes it is same, sometimes similar, 238 // occasionally completely different 239 byte[] qualifier; 240 float qualifierChance = randomizer.nextFloat(); 241 if (!rowsToQualifier.containsKey(rowId) 242 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) { 243 int qualifierLength = averageQualifierLength; 244 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1) 245 - qualifierLengthVariance; 246 qualifier = new byte[qualifierLength]; 247 randomizer.nextBytes(qualifier); 248 249 // add it to map 250 if (!rowsToQualifier.containsKey(rowId)) { 251 rowsToQualifier.put(rowId, new ArrayList<>()); 252 } 253 rowsToQualifier.get(rowId).add(qualifier); 254 } else if (qualifierChance > chanceForSameQualifier) { 255 // similar qualifier 256 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 257 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers 258 .size())); 259 260 qualifier = new byte[originalQualifier.length]; 261 int commonPrefix = randomizer.nextInt(qualifier.length); 262 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix); 263 for (int j = commonPrefix; j < qualifier.length; ++j) { 264 qualifier[j] = (byte) (randomizer.nextInt() & 0xff); 265 } 266 267 rowsToQualifier.get(rowId).add(qualifier); 268 } else { 269 // same qualifier 270 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 271 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 272 } 273 274 if (randomizer.nextFloat() < chanceForZeroValue) { 275 for (int j = 0; j < value.length; ++j) { 276 value[j] = (byte) 0; 277 } 278 } else { 279 randomizer.nextBytes(value); 280 } 281 282 if (useTags) { 283 result.add(new KeyValue(row, family, qualifier, timestamp, value, 284 new Tag[] { new ArrayBackedTag((byte) 1, "value1") })); 285 } else { 286 result.add(new KeyValue(row, family, qualifier, timestamp, value)); 287 } 288 } 289 290 Collections.sort(result, CellComparator.getInstance()); 291 292 return result; 293 } 294 295 /** 296 * Generate test data useful to test encoders. 297 * @param howMany How many Key values should be generated. 298 * @return sorted list of key values 299 */ 300 public List<Cell> generateTestExtendedOffheapKeyValues(int howMany, boolean useTags) { 301 List<Cell> result = new ArrayList<>(); 302 List<byte[]> rows = generateRows(); 303 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>(); 304 305 if (family == null) { 306 family = new byte[columnFamilyLength]; 307 randomizer.nextBytes(family); 308 } 309 310 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide; 311 312 byte[] value = new byte[valueLength]; 313 314 for (int i = 0; i < howMany; ++i) { 315 long timestamp = baseTimestamp; 316 if(timestampDiffSize > 0){ 317 timestamp += randomizer.nextInt(timestampDiffSize); 318 } 319 Integer rowId = randomizer.nextInt(rows.size()); 320 byte[] row = rows.get(rowId); 321 322 // generate qualifier, sometimes it is same, sometimes similar, 323 // occasionally completely different 324 byte[] qualifier; 325 float qualifierChance = randomizer.nextFloat(); 326 if (!rowsToQualifier.containsKey(rowId) 327 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) { 328 int qualifierLength = averageQualifierLength; 329 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1) 330 - qualifierLengthVariance; 331 qualifier = new byte[qualifierLength]; 332 randomizer.nextBytes(qualifier); 333 334 // add it to map 335 if (!rowsToQualifier.containsKey(rowId)) { 336 rowsToQualifier.put(rowId, new ArrayList<>()); 337 } 338 rowsToQualifier.get(rowId).add(qualifier); 339 } else if (qualifierChance > chanceForSameQualifier) { 340 // similar qualifier 341 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 342 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers 343 .size())); 344 345 qualifier = new byte[originalQualifier.length]; 346 int commonPrefix = randomizer.nextInt(qualifier.length); 347 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix); 348 for (int j = commonPrefix; j < qualifier.length; ++j) { 349 qualifier[j] = (byte) (randomizer.nextInt() & 0xff); 350 } 351 352 rowsToQualifier.get(rowId).add(qualifier); 353 } else { 354 // same qualifier 355 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 356 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 357 } 358 359 if (randomizer.nextFloat() < chanceForZeroValue) { 360 for (int j = 0; j < value.length; ++j) { 361 value[j] = (byte) 0; 362 } 363 } else { 364 randomizer.nextBytes(value); 365 } 366 if (useTags) { 367 KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value, 368 new Tag[] { new ArrayBackedTag((byte) 1, "value1") }); 369 ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength()); 370 ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(), 371 keyValue.getOffset(), keyValue.getLength()); 372 ByteBufferKeyValue offheapKV = 373 new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0); 374 result.add(offheapKV); 375 } else { 376 KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value); 377 ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength()); 378 ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(), 379 keyValue.getOffset(), keyValue.getLength()); 380 ByteBufferKeyValue offheapKV = 381 new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0); 382 result.add(offheapKV); 383 } 384 } 385 386 Collections.sort(result, CellComparator.getInstance()); 387 388 return result; 389 } 390 391 static class ExtendedOffheapKeyValue extends ByteBufferKeyValue { 392 public ExtendedOffheapKeyValue(ByteBuffer buf, int offset, int length, long seqId) { 393 super(buf, offset, length, seqId); 394 } 395 396 @Override 397 public byte[] getRowArray() { 398 throw new IllegalArgumentException("getRowArray operation is not allowed"); 399 } 400 401 @Override 402 public int getRowOffset() { 403 throw new IllegalArgumentException("getRowOffset operation is not allowed"); 404 } 405 406 @Override 407 public byte[] getFamilyArray() { 408 throw new IllegalArgumentException("getFamilyArray operation is not allowed"); 409 } 410 411 @Override 412 public int getFamilyOffset() { 413 throw new IllegalArgumentException("getFamilyOffset operation is not allowed"); 414 } 415 416 @Override 417 public byte[] getQualifierArray() { 418 throw new IllegalArgumentException("getQualifierArray operation is not allowed"); 419 } 420 421 @Override 422 public int getQualifierOffset() { 423 throw new IllegalArgumentException("getQualifierOffset operation is not allowed"); 424 } 425 426 @Override 427 public byte[] getValueArray() { 428 throw new IllegalArgumentException("getValueArray operation is not allowed"); 429 } 430 431 @Override 432 public int getValueOffset() { 433 throw new IllegalArgumentException("getValueOffset operation is not allowed"); 434 } 435 436 @Override 437 public byte[] getTagsArray() { 438 throw new IllegalArgumentException("getTagsArray operation is not allowed"); 439 } 440 441 @Override 442 public int getTagsOffset() { 443 throw new IllegalArgumentException("getTagsOffset operation is not allowed"); 444 } 445 } 446 447 /** 448 * Convert list of KeyValues to byte buffer. 449 * @param keyValues list of KeyValues to be converted. 450 * @return buffer with content from key values 451 */ 452 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues, 453 boolean includesMemstoreTS) { 454 int totalSize = 0; 455 for (KeyValue kv : keyValues) { 456 totalSize += kv.getLength(); 457 if (includesMemstoreTS) { 458 totalSize += WritableUtils.getVIntSize(kv.getSequenceId()); 459 } 460 } 461 462 ByteBuffer result = ByteBuffer.allocate(totalSize); 463 for (KeyValue kv : keyValues) { 464 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength()); 465 if (includesMemstoreTS) { 466 ByteBufferUtils.writeVLong(result, kv.getSequenceId()); 467 } 468 } 469 return result; 470 } 471 472 /************************ get/set ***********************************/ 473 public RedundantKVGenerator setCommonPrefix(byte[] prefix){ 474 this.commonPrefix = prefix; 475 return this; 476 } 477 478 public RedundantKVGenerator setRandomizer(Random randomizer) { 479 this.randomizer = randomizer; 480 return this; 481 } 482 483 public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) { 484 this.numberOfRowPrefixes = numberOfRowPrefixes; 485 return this; 486 } 487 488 public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) { 489 this.averagePrefixLength = averagePrefixLength; 490 return this; 491 } 492 493 public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) { 494 this.prefixLengthVariance = prefixLengthVariance; 495 return this; 496 } 497 498 public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) { 499 this.averageSuffixLength = averageSuffixLength; 500 return this; 501 } 502 503 public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) { 504 this.suffixLengthVariance = suffixLengthVariance; 505 return this; 506 } 507 508 public RedundantKVGenerator setNumberOfRows(int numberOfRows) { 509 this.numberOfRows = numberOfRows; 510 return this; 511 } 512 513 public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) { 514 this.chanceForSameQualifier = chanceForSameQualifier; 515 return this; 516 } 517 518 public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) { 519 this.chanceForSimilarQualifier = chanceForSimiliarQualifier; 520 return this; 521 } 522 523 public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) { 524 this.averageQualifierLength = averageQualifierLength; 525 return this; 526 } 527 528 public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) { 529 this.qualifierLengthVariance = qualifierLengthVariance; 530 return this; 531 } 532 533 public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) { 534 this.columnFamilyLength = columnFamilyLength; 535 return this; 536 } 537 538 public RedundantKVGenerator setFamily(byte[] family) { 539 this.family = family; 540 this.columnFamilyLength = family.length; 541 return this; 542 } 543 544 public RedundantKVGenerator setValueLength(int valueLength) { 545 this.valueLength = valueLength; 546 return this; 547 } 548 549 public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) { 550 this.chanceForZeroValue = chanceForZeroValue; 551 return this; 552 } 553 554 public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) { 555 this.baseTimestampDivide = baseTimestampDivide; 556 return this; 557 } 558 559 public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) { 560 this.timestampDiffSize = timestampDiffSize; 561 return this; 562 } 563}