001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.ByteArrayOutputStream; 025import java.io.DataOutputStream; 026import java.io.IOException; 027import java.nio.ByteBuffer; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collection; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Random; 034import java.util.Set; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.FSDataInputStream; 037import org.apache.hadoop.fs.FSDataOutputStream; 038import org.apache.hadoop.fs.FileSystem; 039import org.apache.hadoop.fs.Path; 040import org.apache.hadoop.hbase.CellComparatorImpl; 041import org.apache.hadoop.hbase.CellUtil; 042import org.apache.hadoop.hbase.HBaseClassTestRule; 043import org.apache.hadoop.hbase.HBaseCommonTestingUtility; 044import org.apache.hadoop.hbase.HBaseTestingUtility; 045import org.apache.hadoop.hbase.KeyValue; 046import org.apache.hadoop.hbase.KeyValueUtil; 047import org.apache.hadoop.hbase.PrivateCellUtil; 048import org.apache.hadoop.hbase.fs.HFileSystem; 049import org.apache.hadoop.hbase.io.ByteBuffAllocator; 050import org.apache.hadoop.hbase.io.compress.Compression; 051import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 052import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 053import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk; 054import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; 055import org.apache.hadoop.hbase.nio.ByteBuff; 056import org.apache.hadoop.hbase.nio.MultiByteBuff; 057import org.apache.hadoop.hbase.testclassification.IOTests; 058import org.apache.hadoop.hbase.testclassification.MediumTests; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.ClassSize; 061import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 062import org.junit.Before; 063import org.junit.ClassRule; 064import org.junit.Test; 065import org.junit.experimental.categories.Category; 066import org.junit.runner.RunWith; 067import org.junit.runners.Parameterized; 068import org.junit.runners.Parameterized.Parameters; 069import org.slf4j.Logger; 070import org.slf4j.LoggerFactory; 071 072@RunWith(Parameterized.class) 073@Category({ IOTests.class, MediumTests.class }) 074public class TestHFileBlockIndex { 075 076 @ClassRule 077 public static final HBaseClassTestRule CLASS_RULE = 078 HBaseClassTestRule.forClass(TestHFileBlockIndex.class); 079 080 @Parameters 081 public static Collection<Object[]> compressionAlgorithms() { 082 return HBaseCommonTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED; 083 } 084 085 public TestHFileBlockIndex(Compression.Algorithm compr) { 086 this.compr = compr; 087 } 088 089 private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class); 090 private static final Random RNG = new Random(); // This test depends on Random#setSeed 091 private static final int NUM_DATA_BLOCKS = 1000; 092 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 093 094 private static final int SMALL_BLOCK_SIZE = 4096; 095 private static final int NUM_KV = 10000; 096 097 private static FileSystem fs; 098 private Path path; 099 private long rootIndexOffset; 100 private int numRootEntries; 101 private int numLevels; 102 private static final List<byte[]> keys = new ArrayList<>(); 103 private final Compression.Algorithm compr; 104 private byte[] firstKeyInFile; 105 private Configuration conf; 106 107 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 }; 108 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 }; 109 private static final int[] UNCOMPRESSED_INDEX_SIZES = { 19187, 21813, 23086 }; 110 111 private static final boolean includesMemstoreTS = true; 112 113 static { 114 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length; 115 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length; 116 } 117 118 @Before 119 public void setUp() throws IOException { 120 keys.clear(); 121 firstKeyInFile = null; 122 conf = TEST_UTIL.getConfiguration(); 123 RNG.setSeed(2389757); 124 125 // This test requires at least HFile format version 2. 126 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); 127 128 fs = HFileSystem.get(conf); 129 } 130 131 @Test 132 public void testBlockIndex() throws IOException { 133 testBlockIndexInternals(false); 134 clear(); 135 testBlockIndexInternals(true); 136 } 137 138 private void clear() throws IOException { 139 keys.clear(); 140 firstKeyInFile = null; 141 conf = TEST_UTIL.getConfiguration(); 142 RNG.setSeed(2389757); 143 144 // This test requires at least HFile format version 2. 145 conf.setInt(HFile.FORMAT_VERSION_KEY, 3); 146 147 fs = HFileSystem.get(conf); 148 } 149 150 private void testBlockIndexInternals(boolean useTags) throws IOException { 151 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags); 152 writeWholeIndex(useTags); 153 readIndex(useTags); 154 } 155 156 /** 157 * A wrapper around a block reader which only caches the results of the last operation. Not 158 * thread-safe. 159 */ 160 private static class BlockReaderWrapper implements HFile.CachingBlockReader { 161 162 private HFileBlock.FSReader realReader; 163 private long prevOffset; 164 private long prevOnDiskSize; 165 private boolean prevPread; 166 private HFileBlock prevBlock; 167 168 public int hitCount = 0; 169 public int missCount = 0; 170 171 public BlockReaderWrapper(HFileBlock.FSReader realReader) { 172 this.realReader = realReader; 173 } 174 175 @Override 176 public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread, 177 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 178 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 179 if (offset == prevOffset && onDiskSize == prevOnDiskSize && pread == prevPread) { 180 hitCount += 1; 181 return prevBlock; 182 } 183 184 missCount += 1; 185 prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true); 186 prevOffset = offset; 187 prevOnDiskSize = onDiskSize; 188 prevPread = pread; 189 190 return prevBlock; 191 } 192 } 193 194 private void readIndex(boolean useTags) throws IOException { 195 long fileSize = fs.getFileStatus(path).getLen(); 196 LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString()); 197 198 FSDataInputStream istream = fs.open(path); 199 HFileContext meta = 200 new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS) 201 .withIncludesTags(useTags).withCompression(compr).build(); 202 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build(); 203 HFileBlock.FSReader blockReader = 204 new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, conf); 205 206 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader); 207 HFileBlockIndex.BlockIndexReader indexReader = 208 new HFileBlockIndex.CellBasedKeyBlockIndexReader(CellComparatorImpl.COMPARATOR, numLevels); 209 210 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, fileSize) 211 .nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries); 212 213 long prevOffset = -1; 214 int i = 0; 215 int expectedHitCount = 0; 216 int expectedMissCount = 0; 217 LOG.info("Total number of keys: " + keys.size()); 218 for (byte[] key : keys) { 219 assertTrue(key != null); 220 assertTrue(indexReader != null); 221 KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); 222 HFileBlock b = indexReader.seekToDataBlock(keyOnlyKey, null, true, true, false, null, brw); 223 if ( 224 PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0, 225 firstKeyInFile.length) < 0 226 ) { 227 assertTrue(b == null); 228 ++i; 229 continue; 230 } 231 232 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key); 233 234 assertTrue("seekToDataBlock failed for " + keyStr, b != null); 235 236 if (prevOffset == b.getOffset()) { 237 assertEquals(++expectedHitCount, brw.hitCount); 238 } else { 239 LOG.info("First key in a new block: " + keyStr + ", block offset: " + b.getOffset() + ")"); 240 assertTrue(b.getOffset() > prevOffset); 241 assertEquals(++expectedMissCount, brw.missCount); 242 prevOffset = b.getOffset(); 243 } 244 ++i; 245 } 246 247 istream.close(); 248 } 249 250 private void writeWholeIndex(boolean useTags) throws IOException { 251 assertEquals(0, keys.size()); 252 HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true) 253 .withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTags).withCompression(compr) 254 .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build(); 255 HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta); 256 FSDataOutputStream outputStream = fs.create(path); 257 HFileBlockIndex.BlockIndexWriter biw = new HFileBlockIndex.BlockIndexWriter(hbw, null, null); 258 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) { 259 hbw.startWriting(BlockType.DATA).write(Bytes.toBytes(String.valueOf(RNG.nextInt(1000)))); 260 long blockOffset = outputStream.getPos(); 261 hbw.writeHeaderAndData(outputStream); 262 263 byte[] firstKey = null; 264 byte[] family = Bytes.toBytes("f"); 265 byte[] qualifier = Bytes.toBytes("q"); 266 for (int j = 0; j < 16; ++j) { 267 byte[] k = new KeyValue(RandomKeyValueUtil.randomOrderedKey(RNG, i * 16 + j), family, 268 qualifier, EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey(); 269 keys.add(k); 270 if (j == 8) { 271 firstKey = k; 272 } 273 } 274 assertTrue(firstKey != null); 275 if (firstKeyInFile == null) { 276 firstKeyInFile = firstKey; 277 } 278 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader()); 279 280 writeInlineBlocks(hbw, outputStream, biw, false); 281 } 282 writeInlineBlocks(hbw, outputStream, biw, true); 283 rootIndexOffset = biw.writeIndexBlocks(outputStream); 284 outputStream.close(); 285 286 numLevels = biw.getNumLevels(); 287 numRootEntries = biw.getNumRootEntries(); 288 289 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" + numRootEntries 290 + ", rootIndexOffset=" + rootIndexOffset); 291 } 292 293 private void writeInlineBlocks(HFileBlock.Writer hbw, FSDataOutputStream outputStream, 294 HFileBlockIndex.BlockIndexWriter biw, boolean isClosing) throws IOException { 295 while (biw.shouldWriteBlock(isClosing)) { 296 long offset = outputStream.getPos(); 297 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType())); 298 hbw.writeHeaderAndData(outputStream); 299 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(), 300 hbw.getUncompressedSizeWithoutHeader()); 301 LOG.info( 302 "Wrote an inline index block at " + offset + ", size " + hbw.getOnDiskSizeWithHeader()); 303 } 304 } 305 306 private static final long getDummyFileOffset(int i) { 307 return i * 185 + 379; 308 } 309 310 private static final int getDummyOnDiskSize(int i) { 311 return i * i * 37 + i * 19 + 13; 312 } 313 314 @Test 315 public void testSecondaryIndexBinarySearch() throws IOException { 316 int numTotalKeys = 99; 317 assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even. 318 319 // We only add odd-index keys into the array that we will binary-search. 320 int numSearchedKeys = (numTotalKeys - 1) / 2; 321 322 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 323 DataOutputStream dos = new DataOutputStream(baos); 324 325 dos.writeInt(numSearchedKeys); 326 int curAllEntriesSize = 0; 327 int numEntriesAdded = 0; 328 329 // Only odd-index elements of this array are used to keep the secondary 330 // index entries of the corresponding keys. 331 int secondaryIndexEntries[] = new int[numTotalKeys]; 332 333 for (int i = 0; i < numTotalKeys; ++i) { 334 byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i * 2); 335 KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("val")); 336 // KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length); 337 keys.add(cell.getKey()); 338 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): "; 339 StringBuilder padding = new StringBuilder(); 340 while (msgPrefix.length() + padding.length() < 70) 341 padding.append(' '); 342 msgPrefix += padding; 343 if (i % 2 == 1) { 344 dos.writeInt(curAllEntriesSize); 345 secondaryIndexEntries[i] = curAllEntriesSize; 346 LOG.info( 347 msgPrefix + "secondary index entry #" + ((i - 1) / 2) + ", offset " + curAllEntriesSize); 348 curAllEntriesSize += cell.getKey().length + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 349 ++numEntriesAdded; 350 } else { 351 secondaryIndexEntries[i] = -1; 352 LOG.info(msgPrefix + "not in the searched array"); 353 } 354 } 355 356 // Make sure the keys are increasing. 357 for (int i = 0; i < keys.size() - 1; ++i) 358 assertTrue(CellComparatorImpl.COMPARATOR.compare( 359 new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length), 360 new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0); 361 362 dos.writeInt(curAllEntriesSize); 363 assertEquals(numSearchedKeys, numEntriesAdded); 364 int secondaryIndexOffset = dos.size(); 365 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2), secondaryIndexOffset); 366 367 for (int i = 1; i <= numTotalKeys - 1; i += 2) { 368 assertEquals(dos.size(), secondaryIndexOffset + secondaryIndexEntries[i]); 369 long dummyFileOffset = getDummyFileOffset(i); 370 int dummyOnDiskSize = getDummyOnDiskSize(i); 371 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" + dummyOnDiskSize 372 + " at offset " + dos.size()); 373 dos.writeLong(dummyFileOffset); 374 dos.writeInt(dummyOnDiskSize); 375 LOG.debug("Stored key " + ((i - 1) / 2) + " at offset " + dos.size()); 376 dos.write(keys.get(i)); 377 } 378 379 dos.writeInt(curAllEntriesSize); 380 381 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray()); 382 for (int i = 0; i < numTotalKeys; ++i) { 383 byte[] searchKey = keys.get(i); 384 byte[] arrayHoldingKey = new byte[searchKey.length + searchKey.length / 2]; 385 386 // To make things a bit more interesting, store the key we are looking 387 // for at a non-zero offset in a new array. 388 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2, searchKey.length); 389 390 KeyValue.KeyOnlyKeyValue cell = 391 new KeyValue.KeyOnlyKeyValue(arrayHoldingKey, searchKey.length / 2, searchKey.length); 392 int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell, 393 new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR); 394 String lookupFailureMsg = 395 "Failed to look up key #" + i + " (" + Bytes.toStringBinary(searchKey) + ")"; 396 397 int expectedResult; 398 int referenceItem; 399 400 if (i % 2 == 1) { 401 // This key is in the array we search as the element (i - 1) / 2. Make 402 // sure we find it. 403 expectedResult = (i - 1) / 2; 404 referenceItem = i; 405 } else { 406 // This key is not in the array but between two elements on the array, 407 // in the beginning, or in the end. The result should be the previous 408 // key in the searched array, or -1 for i = 0. 409 expectedResult = i / 2 - 1; 410 referenceItem = i - 1; 411 } 412 413 assertEquals(lookupFailureMsg, expectedResult, searchResult); 414 415 // Now test we can get the offset and the on-disk-size using a 416 // higher-level API function.s 417 boolean locateBlockResult = 418 (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell, 419 CellComparatorImpl.COMPARATOR) != -1); 420 421 if (i == 0) { 422 assertFalse(locateBlockResult); 423 } else { 424 assertTrue(locateBlockResult); 425 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position(); 426 assertEquals(errorMsg, getDummyFileOffset(referenceItem), nonRootIndex.getLong()); 427 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem), nonRootIndex.getInt()); 428 } 429 } 430 431 } 432 433 @Test 434 public void testBlockIndexChunk() throws IOException { 435 BlockIndexChunk c = new BlockIndexChunk(); 436 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 437 int N = 1000; 438 int[] numSubEntriesAt = new int[N]; 439 int numSubEntries = 0; 440 for (int i = 0; i < N; ++i) { 441 baos.reset(); 442 DataOutputStream dos = new DataOutputStream(baos); 443 c.writeNonRoot(dos); 444 assertEquals(c.getNonRootSize(), dos.size()); 445 446 baos.reset(); 447 dos = new DataOutputStream(baos); 448 c.writeRoot(dos); 449 assertEquals(c.getRootSize(), dos.size()); 450 451 byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i); 452 numSubEntries += RNG.nextInt(5) + 1; 453 keys.add(k); 454 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries); 455 } 456 457 // Test the ability to look up the entry that contains a particular 458 // deeper-level index block's entry ("sub-entry"), assuming a global 459 // 0-based ordering of sub-entries. This is needed for mid-key calculation. 460 for (int i = 0; i < N; ++i) { 461 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1]; j < numSubEntriesAt[i]; ++j) { 462 assertEquals(i, c.getEntryBySubEntry(j)); 463 } 464 } 465 } 466 467 /** Checks if the HeapSize calculator is within reason */ 468 @Test 469 public void testHeapSizeForBlockIndex() throws IOException { 470 Class<HFileBlockIndex.BlockIndexReader> cl = HFileBlockIndex.BlockIndexReader.class; 471 long expected = ClassSize.estimateBase(cl, false); 472 473 HFileBlockIndex.BlockIndexReader bi = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 474 long actual = bi.heapSize(); 475 476 // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets, 477 // int [] blockDataSizes) are all null they are not going to show up in the 478 // HeapSize calculation, so need to remove those array costs from expected. 479 // Already the block keys are not there in this case 480 expected -= ClassSize.align(2 * ClassSize.ARRAY); 481 482 if (expected != actual) { 483 expected = ClassSize.estimateBase(cl, true); 484 assertEquals(expected, actual); 485 } 486 } 487 488 /** 489 * to check if looks good when midKey on a leaf index block boundary n 490 */ 491 @Test 492 public void testMidKeyOnLeafIndexBlockBoundary() throws IOException { 493 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey"); 494 int maxChunkSize = 512; 495 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 496 // should open hfile.block.index.cacheonwrite 497 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true); 498 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 499 BlockCache blockCache = cacheConf.getBlockCache().get(); 500 // Evict all blocks that were cached-on-write by the previous invocation. 501 blockCache.evictBlocksByHfileName(hfilePath.getName()); 502 // Write the HFile 503 HFileContext meta = new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE) 504 .withCompression(Algorithm.NONE).withDataBlockEncoding(DataBlockEncoding.NONE).build(); 505 HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath) 506 .withFileContext(meta).create(); 507 Random rand = new Random(19231737); 508 byte[] family = Bytes.toBytes("f"); 509 byte[] qualifier = Bytes.toBytes("q"); 510 int kvNumberToBeWritten = 16; 511 // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks, 512 // midkey is just on the boundary of the first leaf-index block 513 for (int i = 0; i < kvNumberToBeWritten; ++i) { 514 byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30); 515 516 // Key will be interpreted by KeyValue.KEY_COMPARATOR 517 KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 518 RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE)); 519 writer.append(kv); 520 } 521 writer.close(); 522 523 // close hfile.block.index.cacheonwrite 524 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false); 525 526 // Read the HFile 527 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 528 529 boolean hasArrayIndexOutOfBoundsException = false; 530 try { 531 // get the mid-key. 532 reader.midKey(); 533 } catch (ArrayIndexOutOfBoundsException e) { 534 hasArrayIndexOutOfBoundsException = true; 535 } finally { 536 reader.close(); 537 } 538 539 // to check if ArrayIndexOutOfBoundsException occurred 540 assertFalse(hasArrayIndexOutOfBoundsException); 541 } 542 543 /** 544 * Testing block index through the HFile writer/reader APIs. Allows to test setting index block 545 * size through configuration, intermediate-level index blocks, and caching index blocks on write. 546 * n 547 */ 548 @Test 549 public void testHFileWriterAndReader() throws IOException { 550 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_block_index"); 551 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 552 BlockCache blockCache = cacheConf.getBlockCache().get(); 553 554 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) { 555 int indexBlockSize = INDEX_CHUNK_SIZES[testI]; 556 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; 557 LOG.info("Index block size: " + indexBlockSize + ", compression: " + compr); 558 // Evict all blocks that were cached-on-write by the previous invocation. 559 blockCache.evictBlocksByHfileName(hfilePath.getName()); 560 561 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); 562 Set<String> keyStrSet = new HashSet<>(); 563 byte[][] keys = new byte[NUM_KV][]; 564 byte[][] values = new byte[NUM_KV][]; 565 566 // Write the HFile 567 { 568 HFileContext meta = 569 new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(compr).build(); 570 HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath) 571 .withFileContext(meta).create(); 572 Random rand = new Random(19231737); 573 byte[] family = Bytes.toBytes("f"); 574 byte[] qualifier = Bytes.toBytes("q"); 575 for (int i = 0; i < NUM_KV; ++i) { 576 byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i); 577 578 // Key will be interpreted by KeyValue.KEY_COMPARATOR 579 KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 580 RandomKeyValueUtil.randomValue(rand)); 581 byte[] k = kv.getKey(); 582 writer.append(kv); 583 keys[i] = k; 584 values[i] = CellUtil.cloneValue(kv); 585 keyStrSet.add(Bytes.toStringBinary(k)); 586 if (i > 0) { 587 assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1], 0, 588 keys[i - 1].length)) > 0); 589 } 590 } 591 592 writer.close(); 593 } 594 595 // Read the HFile 596 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 597 assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); 598 599 assertTrue(Bytes.equals(keys[0], ((KeyValue) reader.getFirstKey().get()).getKey())); 600 assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue) reader.getLastKey().get()).getKey())); 601 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1])); 602 603 for (boolean pread : new boolean[] { false, true }) { 604 HFileScanner scanner = reader.getScanner(conf, true, pread); 605 for (int i = 0; i < NUM_KV; ++i) { 606 checkSeekTo(keys, scanner, i); 607 checkKeyValue("i=" + i, keys[i], values[i], 608 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 609 } 610 assertTrue(scanner.seekTo()); 611 for (int i = NUM_KV - 1; i >= 0; --i) { 612 checkSeekTo(keys, scanner, i); 613 checkKeyValue("i=" + i, keys[i], values[i], 614 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 615 } 616 } 617 618 // Manually compute the mid-key and validate it. 619 HFile.Reader reader2 = reader; 620 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader(); 621 622 HFileBlock.BlockIterator iter = 623 fsReader.blockRange(0, reader.getTrailer().getLoadOnOpenDataOffset()); 624 HFileBlock block; 625 List<byte[]> blockKeys = new ArrayList<>(); 626 while ((block = iter.nextBlock()) != null) { 627 if (block.getBlockType() != BlockType.LEAF_INDEX) return; 628 ByteBuff b = block.getBufferReadOnly(); 629 int n = b.getIntAfterPosition(0); 630 // One int for the number of items, and n + 1 for the secondary index. 631 int entriesOffset = Bytes.SIZEOF_INT * (n + 2); 632 633 // Get all the keys from the leaf index block. S 634 for (int i = 0; i < n; ++i) { 635 int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1)); 636 int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2)); 637 int keyLen = nextKeyRelOffset - keyRelOffset; 638 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset 639 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 640 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen); 641 String blockKeyStr = Bytes.toString(blockKey); 642 blockKeys.add(blockKey); 643 644 // If the first key of the block is not among the keys written, we 645 // are not parsing the non-root index block format correctly. 646 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr, 647 keyStrSet.contains(blockKeyStr)); 648 } 649 } 650 651 // Validate the mid-key. 652 assertEquals(Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)), 653 reader.midKey()); 654 655 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI], 656 reader.getTrailer().getUncompressedDataIndexSize()); 657 658 reader.close(); 659 reader2.close(); 660 } 661 } 662 663 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) throws IOException { 664 assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0, 665 scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i]))); 666 } 667 668 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, ByteBuffer buf) { 669 assertEquals( 670 msgPrefix + ": expected " + Bytes.toStringBinary(arr) + ", actual " 671 + Bytes.toStringBinary(buf), 672 0, Bytes.compareTo(arr, 0, arr.length, buf.array(), buf.arrayOffset(), buf.limit())); 673 } 674 675 /** Check a key/value pair after it was read by the reader */ 676 private void checkKeyValue(String msgPrefix, byte[] expectedKey, byte[] expectedValue, 677 ByteBuffer keyRead, ByteBuffer valueRead) { 678 if (!msgPrefix.isEmpty()) msgPrefix += ". "; 679 680 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead); 681 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, valueRead); 682 } 683 684 @Test 685 public void testIntermediateLevelIndicesWithLargeKeys() throws IOException { 686 testIntermediateLevelIndicesWithLargeKeys(16); 687 } 688 689 @Test 690 public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException { 691 // because of the large rowKeys, we will end up with a 50-level block index without sanity check 692 testIntermediateLevelIndicesWithLargeKeys(2); 693 } 694 695 public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException { 696 Path hfPath = 697 new Path(TEST_UTIL.getDataTestDir(), "testIntermediateLevelIndicesWithLargeKeys.hfile"); 698 int maxChunkSize = 1024; 699 FileSystem fs = FileSystem.get(conf); 700 CacheConfig cacheConf = new CacheConfig(conf); 701 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 702 conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries); 703 HFileContext context = new HFileContextBuilder().withBlockSize(16).build(); 704 HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf).withFileContext(context) 705 .withPath(fs, hfPath).create(); 706 List<byte[]> keys = new ArrayList<>(); 707 708 // This should result in leaf-level indices and a root level index 709 for (int i = 0; i < 100; i++) { 710 byte[] rowkey = new byte[maxChunkSize + 1]; 711 byte[] b = Bytes.toBytes(i); 712 System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length); 713 keys.add(rowkey); 714 hfw.append(CellUtil.createCell(rowkey)); 715 } 716 hfw.close(); 717 718 HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf); 719 // Scanner doesn't do Cells yet. Fix. 720 HFileScanner scanner = reader.getScanner(conf, true, true); 721 for (int i = 0; i < keys.size(); ++i) { 722 scanner.seekTo(CellUtil.createCell(keys.get(i))); 723 } 724 reader.close(); 725 } 726}