001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.ByteArrayOutputStream; 025import java.io.DataOutputStream; 026import java.io.IOException; 027import java.nio.ByteBuffer; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collection; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Random; 034import java.util.Set; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.FSDataInputStream; 037import org.apache.hadoop.fs.FSDataOutputStream; 038import org.apache.hadoop.fs.FileSystem; 039import org.apache.hadoop.fs.Path; 040import org.apache.hadoop.hbase.CellComparatorImpl; 041import org.apache.hadoop.hbase.CellUtil; 042import org.apache.hadoop.hbase.HBaseClassTestRule; 043import org.apache.hadoop.hbase.HBaseCommonTestingUtility; 044import org.apache.hadoop.hbase.HBaseTestingUtility; 045import org.apache.hadoop.hbase.KeyValue; 046import org.apache.hadoop.hbase.KeyValueUtil; 047import org.apache.hadoop.hbase.PrivateCellUtil; 048import org.apache.hadoop.hbase.fs.HFileSystem; 049import org.apache.hadoop.hbase.io.ByteBuffAllocator; 050import org.apache.hadoop.hbase.io.compress.Compression; 051import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 052import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 053import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk; 054import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; 055import org.apache.hadoop.hbase.nio.ByteBuff; 056import org.apache.hadoop.hbase.nio.MultiByteBuff; 057import org.apache.hadoop.hbase.testclassification.IOTests; 058import org.apache.hadoop.hbase.testclassification.MediumTests; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.ClassSize; 061import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 062import org.junit.Before; 063import org.junit.ClassRule; 064import org.junit.Test; 065import org.junit.experimental.categories.Category; 066import org.junit.runner.RunWith; 067import org.junit.runners.Parameterized; 068import org.junit.runners.Parameterized.Parameters; 069import org.slf4j.Logger; 070import org.slf4j.LoggerFactory; 071 072@RunWith(Parameterized.class) 073@Category({IOTests.class, MediumTests.class}) 074public class TestHFileBlockIndex { 075 076 @ClassRule 077 public static final HBaseClassTestRule CLASS_RULE = 078 HBaseClassTestRule.forClass(TestHFileBlockIndex.class); 079 080 @Parameters 081 public static Collection<Object[]> compressionAlgorithms() { 082 return HBaseCommonTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED; 083 } 084 085 public TestHFileBlockIndex(Compression.Algorithm compr) { 086 this.compr = compr; 087 } 088 089 private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class); 090 091 private static final int NUM_DATA_BLOCKS = 1000; 092 private static final HBaseTestingUtility TEST_UTIL = 093 new HBaseTestingUtility(); 094 095 private static final int SMALL_BLOCK_SIZE = 4096; 096 private static final int NUM_KV = 10000; 097 098 private static FileSystem fs; 099 private Path path; 100 private Random rand; 101 private long rootIndexOffset; 102 private int numRootEntries; 103 private int numLevels; 104 private static final List<byte[]> keys = new ArrayList<>(); 105 private final Compression.Algorithm compr; 106 private byte[] firstKeyInFile; 107 private Configuration conf; 108 109 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 }; 110 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 }; 111 private static final int[] UNCOMPRESSED_INDEX_SIZES = 112 { 19187, 21813, 23086 }; 113 114 private static final boolean includesMemstoreTS = true; 115 116 static { 117 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length; 118 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length; 119 } 120 121 @Before 122 public void setUp() throws IOException { 123 keys.clear(); 124 rand = new Random(2389757); 125 firstKeyInFile = null; 126 conf = TEST_UTIL.getConfiguration(); 127 128 // This test requires at least HFile format version 2. 129 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); 130 131 fs = HFileSystem.get(conf); 132 } 133 134 @Test 135 public void testBlockIndex() throws IOException { 136 testBlockIndexInternals(false); 137 clear(); 138 testBlockIndexInternals(true); 139 } 140 141 private void clear() throws IOException { 142 keys.clear(); 143 rand = new Random(2389757); 144 firstKeyInFile = null; 145 conf = TEST_UTIL.getConfiguration(); 146 147 // This test requires at least HFile format version 2. 148 conf.setInt(HFile.FORMAT_VERSION_KEY, 3); 149 150 fs = HFileSystem.get(conf); 151 } 152 153 private void testBlockIndexInternals(boolean useTags) throws IOException { 154 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags); 155 writeWholeIndex(useTags); 156 readIndex(useTags); 157 } 158 159 /** 160 * A wrapper around a block reader which only caches the results of the last 161 * operation. Not thread-safe. 162 */ 163 private static class BlockReaderWrapper implements HFile.CachingBlockReader { 164 165 private HFileBlock.FSReader realReader; 166 private long prevOffset; 167 private long prevOnDiskSize; 168 private boolean prevPread; 169 private HFileBlock prevBlock; 170 171 public int hitCount = 0; 172 public int missCount = 0; 173 174 public BlockReaderWrapper(HFileBlock.FSReader realReader) { 175 this.realReader = realReader; 176 } 177 178 @Override 179 public HFileBlock readBlock(long offset, long onDiskSize, 180 boolean cacheBlock, boolean pread, boolean isCompaction, 181 boolean updateCacheMetrics, BlockType expectedBlockType, 182 DataBlockEncoding expectedDataBlockEncoding) 183 throws IOException { 184 if (offset == prevOffset && onDiskSize == prevOnDiskSize && 185 pread == prevPread) { 186 hitCount += 1; 187 return prevBlock; 188 } 189 190 missCount += 1; 191 prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true); 192 prevOffset = offset; 193 prevOnDiskSize = onDiskSize; 194 prevPread = pread; 195 196 return prevBlock; 197 } 198 } 199 200 private void readIndex(boolean useTags) throws IOException { 201 long fileSize = fs.getFileStatus(path).getLen(); 202 LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString()); 203 204 FSDataInputStream istream = fs.open(path); 205 HFileContext meta = new HFileContextBuilder() 206 .withHBaseCheckSum(true) 207 .withIncludesMvcc(includesMemstoreTS) 208 .withIncludesTags(useTags) 209 .withCompression(compr) 210 .build(); 211 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build(); 212 HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(context, meta, 213 ByteBuffAllocator.HEAP); 214 215 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader); 216 HFileBlockIndex.BlockIndexReader indexReader = 217 new HFileBlockIndex.CellBasedKeyBlockIndexReader( 218 CellComparatorImpl.COMPARATOR, numLevels); 219 220 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, 221 fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries); 222 223 long prevOffset = -1; 224 int i = 0; 225 int expectedHitCount = 0; 226 int expectedMissCount = 0; 227 LOG.info("Total number of keys: " + keys.size()); 228 for (byte[] key : keys) { 229 assertTrue(key != null); 230 assertTrue(indexReader != null); 231 KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); 232 HFileBlock b = 233 indexReader.seekToDataBlock(keyOnlyKey, null, true, 234 true, false, null, brw); 235 if (PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0, 236 firstKeyInFile.length) < 0) { 237 assertTrue(b == null); 238 ++i; 239 continue; 240 } 241 242 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key); 243 244 assertTrue("seekToDataBlock failed for " + keyStr, b != null); 245 246 if (prevOffset == b.getOffset()) { 247 assertEquals(++expectedHitCount, brw.hitCount); 248 } else { 249 LOG.info("First key in a new block: " + keyStr + ", block offset: " 250 + b.getOffset() + ")"); 251 assertTrue(b.getOffset() > prevOffset); 252 assertEquals(++expectedMissCount, brw.missCount); 253 prevOffset = b.getOffset(); 254 } 255 ++i; 256 } 257 258 istream.close(); 259 } 260 261 private void writeWholeIndex(boolean useTags) throws IOException { 262 assertEquals(0, keys.size()); 263 HFileContext meta = new HFileContextBuilder() 264 .withHBaseCheckSum(true) 265 .withIncludesMvcc(includesMemstoreTS) 266 .withIncludesTags(useTags) 267 .withCompression(compr) 268 .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM) 269 .build(); 270 HFileBlock.Writer hbw = new HFileBlock.Writer(null, 271 meta); 272 FSDataOutputStream outputStream = fs.create(path); 273 HFileBlockIndex.BlockIndexWriter biw = 274 new HFileBlockIndex.BlockIndexWriter(hbw, null, null); 275 276 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) { 277 hbw.startWriting(BlockType.DATA).write(String.valueOf(rand.nextInt(1000)).getBytes()); 278 long blockOffset = outputStream.getPos(); 279 hbw.writeHeaderAndData(outputStream); 280 281 byte[] firstKey = null; 282 byte[] family = Bytes.toBytes("f"); 283 byte[] qualifier = Bytes.toBytes("q"); 284 for (int j = 0; j < 16; ++j) { 285 byte[] k = 286 new KeyValue(RandomKeyValueUtil.randomOrderedKey(rand, i * 16 + j), family, qualifier, 287 EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey(); 288 keys.add(k); 289 if (j == 8) { 290 firstKey = k; 291 } 292 } 293 assertTrue(firstKey != null); 294 if (firstKeyInFile == null) { 295 firstKeyInFile = firstKey; 296 } 297 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader()); 298 299 writeInlineBlocks(hbw, outputStream, biw, false); 300 } 301 writeInlineBlocks(hbw, outputStream, biw, true); 302 rootIndexOffset = biw.writeIndexBlocks(outputStream); 303 outputStream.close(); 304 305 numLevels = biw.getNumLevels(); 306 numRootEntries = biw.getNumRootEntries(); 307 308 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" + 309 numRootEntries + ", rootIndexOffset=" + rootIndexOffset); 310 } 311 312 private void writeInlineBlocks(HFileBlock.Writer hbw, 313 FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw, 314 boolean isClosing) throws IOException { 315 while (biw.shouldWriteBlock(isClosing)) { 316 long offset = outputStream.getPos(); 317 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType())); 318 hbw.writeHeaderAndData(outputStream); 319 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(), 320 hbw.getUncompressedSizeWithoutHeader()); 321 LOG.info("Wrote an inline index block at " + offset + ", size " + 322 hbw.getOnDiskSizeWithHeader()); 323 } 324 } 325 326 private static final long getDummyFileOffset(int i) { 327 return i * 185 + 379; 328 } 329 330 private static final int getDummyOnDiskSize(int i) { 331 return i * i * 37 + i * 19 + 13; 332 } 333 334 @Test 335 public void testSecondaryIndexBinarySearch() throws IOException { 336 int numTotalKeys = 99; 337 assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even. 338 339 // We only add odd-index keys into the array that we will binary-search. 340 int numSearchedKeys = (numTotalKeys - 1) / 2; 341 342 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 343 DataOutputStream dos = new DataOutputStream(baos); 344 345 dos.writeInt(numSearchedKeys); 346 int curAllEntriesSize = 0; 347 int numEntriesAdded = 0; 348 349 // Only odd-index elements of this array are used to keep the secondary 350 // index entries of the corresponding keys. 351 int secondaryIndexEntries[] = new int[numTotalKeys]; 352 353 for (int i = 0; i < numTotalKeys; ++i) { 354 byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i * 2); 355 KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), 356 Bytes.toBytes("val")); 357 //KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length); 358 keys.add(cell.getKey()); 359 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): "; 360 StringBuilder padding = new StringBuilder(); 361 while (msgPrefix.length() + padding.length() < 70) 362 padding.append(' '); 363 msgPrefix += padding; 364 if (i % 2 == 1) { 365 dos.writeInt(curAllEntriesSize); 366 secondaryIndexEntries[i] = curAllEntriesSize; 367 LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) + 368 ", offset " + curAllEntriesSize); 369 curAllEntriesSize += cell.getKey().length 370 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 371 ++numEntriesAdded; 372 } else { 373 secondaryIndexEntries[i] = -1; 374 LOG.info(msgPrefix + "not in the searched array"); 375 } 376 } 377 378 // Make sure the keys are increasing. 379 for (int i = 0; i < keys.size() - 1; ++i) 380 assertTrue(CellComparatorImpl.COMPARATOR.compare( 381 new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length), 382 new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0); 383 384 dos.writeInt(curAllEntriesSize); 385 assertEquals(numSearchedKeys, numEntriesAdded); 386 int secondaryIndexOffset = dos.size(); 387 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2), 388 secondaryIndexOffset); 389 390 for (int i = 1; i <= numTotalKeys - 1; i += 2) { 391 assertEquals(dos.size(), 392 secondaryIndexOffset + secondaryIndexEntries[i]); 393 long dummyFileOffset = getDummyFileOffset(i); 394 int dummyOnDiskSize = getDummyOnDiskSize(i); 395 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" + 396 dummyOnDiskSize + " at offset " + dos.size()); 397 dos.writeLong(dummyFileOffset); 398 dos.writeInt(dummyOnDiskSize); 399 LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size()); 400 dos.write(keys.get(i)); 401 } 402 403 dos.writeInt(curAllEntriesSize); 404 405 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray()); 406 for (int i = 0; i < numTotalKeys; ++i) { 407 byte[] searchKey = keys.get(i); 408 byte[] arrayHoldingKey = new byte[searchKey.length + 409 searchKey.length / 2]; 410 411 // To make things a bit more interesting, store the key we are looking 412 // for at a non-zero offset in a new array. 413 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2, 414 searchKey.length); 415 416 KeyValue.KeyOnlyKeyValue cell = new KeyValue.KeyOnlyKeyValue( 417 arrayHoldingKey, searchKey.length / 2, searchKey.length); 418 int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell, 419 new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR); 420 String lookupFailureMsg = "Failed to look up key #" + i + " (" 421 + Bytes.toStringBinary(searchKey) + ")"; 422 423 int expectedResult; 424 int referenceItem; 425 426 if (i % 2 == 1) { 427 // This key is in the array we search as the element (i - 1) / 2. Make 428 // sure we find it. 429 expectedResult = (i - 1) / 2; 430 referenceItem = i; 431 } else { 432 // This key is not in the array but between two elements on the array, 433 // in the beginning, or in the end. The result should be the previous 434 // key in the searched array, or -1 for i = 0. 435 expectedResult = i / 2 - 1; 436 referenceItem = i - 1; 437 } 438 439 assertEquals(lookupFailureMsg, expectedResult, searchResult); 440 441 // Now test we can get the offset and the on-disk-size using a 442 // higher-level API function.s 443 boolean locateBlockResult = 444 (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell, 445 CellComparatorImpl.COMPARATOR) != -1); 446 447 if (i == 0) { 448 assertFalse(locateBlockResult); 449 } else { 450 assertTrue(locateBlockResult); 451 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position(); 452 assertEquals(errorMsg, getDummyFileOffset(referenceItem), 453 nonRootIndex.getLong()); 454 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem), 455 nonRootIndex.getInt()); 456 } 457 } 458 459 } 460 461 @Test 462 public void testBlockIndexChunk() throws IOException { 463 BlockIndexChunk c = new BlockIndexChunk(); 464 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 465 int N = 1000; 466 int[] numSubEntriesAt = new int[N]; 467 int numSubEntries = 0; 468 for (int i = 0; i < N; ++i) { 469 baos.reset(); 470 DataOutputStream dos = new DataOutputStream(baos); 471 c.writeNonRoot(dos); 472 assertEquals(c.getNonRootSize(), dos.size()); 473 474 baos.reset(); 475 dos = new DataOutputStream(baos); 476 c.writeRoot(dos); 477 assertEquals(c.getRootSize(), dos.size()); 478 479 byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i); 480 numSubEntries += rand.nextInt(5) + 1; 481 keys.add(k); 482 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries); 483 } 484 485 // Test the ability to look up the entry that contains a particular 486 // deeper-level index block's entry ("sub-entry"), assuming a global 487 // 0-based ordering of sub-entries. This is needed for mid-key calculation. 488 for (int i = 0; i < N; ++i) { 489 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1]; 490 j < numSubEntriesAt[i]; 491 ++j) { 492 assertEquals(i, c.getEntryBySubEntry(j)); 493 } 494 } 495 } 496 497 /** Checks if the HeapSize calculator is within reason */ 498 @Test 499 public void testHeapSizeForBlockIndex() throws IOException { 500 Class<HFileBlockIndex.BlockIndexReader> cl = 501 HFileBlockIndex.BlockIndexReader.class; 502 long expected = ClassSize.estimateBase(cl, false); 503 504 HFileBlockIndex.BlockIndexReader bi = 505 new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 506 long actual = bi.heapSize(); 507 508 // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets, 509 // int [] blockDataSizes) are all null they are not going to show up in the 510 // HeapSize calculation, so need to remove those array costs from expected. 511 // Already the block keys are not there in this case 512 expected -= ClassSize.align(2 * ClassSize.ARRAY); 513 514 if (expected != actual) { 515 expected = ClassSize.estimateBase(cl, true); 516 assertEquals(expected, actual); 517 } 518 } 519 520 /** 521 * to check if looks good when midKey on a leaf index block boundary 522 * @throws IOException 523 */ 524 @Test 525 public void testMidKeyOnLeafIndexBlockBoundary() throws IOException { 526 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey"); 527 int maxChunkSize = 512; 528 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 529 // should open hfile.block.index.cacheonwrite 530 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true); 531 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 532 BlockCache blockCache = cacheConf.getBlockCache().get(); 533 // Evict all blocks that were cached-on-write by the previous invocation. 534 blockCache.evictBlocksByHfileName(hfilePath.getName()); 535 // Write the HFile 536 HFileContext meta = 537 new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(Algorithm.NONE) 538 .withDataBlockEncoding(DataBlockEncoding.NONE).build(); 539 HFile.Writer writer = 540 HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath).withFileContext(meta) 541 .create(); 542 Random rand = new Random(19231737); 543 byte[] family = Bytes.toBytes("f"); 544 byte[] qualifier = Bytes.toBytes("q"); 545 int kvNumberToBeWritten = 16; 546 // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks, 547 // midkey is just on the boundary of the first leaf-index block 548 for (int i = 0; i < kvNumberToBeWritten; ++i) { 549 byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30); 550 551 // Key will be interpreted by KeyValue.KEY_COMPARATOR 552 KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 553 RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE)); 554 writer.append(kv); 555 } 556 writer.close(); 557 558 // close hfile.block.index.cacheonwrite 559 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false); 560 561 // Read the HFile 562 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 563 564 boolean hasArrayIndexOutOfBoundsException = false; 565 try { 566 // get the mid-key. 567 reader.midKey(); 568 } catch (ArrayIndexOutOfBoundsException e) { 569 hasArrayIndexOutOfBoundsException = true; 570 } finally { 571 reader.close(); 572 } 573 574 // to check if ArrayIndexOutOfBoundsException occurred 575 assertFalse(hasArrayIndexOutOfBoundsException); 576 } 577 578 /** 579 * Testing block index through the HFile writer/reader APIs. Allows to test 580 * setting index block size through configuration, intermediate-level index 581 * blocks, and caching index blocks on write. 582 * 583 * @throws IOException 584 */ 585 @Test 586 public void testHFileWriterAndReader() throws IOException { 587 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), 588 "hfile_for_block_index"); 589 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 590 BlockCache blockCache = cacheConf.getBlockCache().get(); 591 592 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) { 593 int indexBlockSize = INDEX_CHUNK_SIZES[testI]; 594 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; 595 LOG.info("Index block size: " + indexBlockSize + ", compression: " 596 + compr); 597 // Evict all blocks that were cached-on-write by the previous invocation. 598 blockCache.evictBlocksByHfileName(hfilePath.getName()); 599 600 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); 601 Set<String> keyStrSet = new HashSet<>(); 602 byte[][] keys = new byte[NUM_KV][]; 603 byte[][] values = new byte[NUM_KV][]; 604 605 // Write the HFile 606 { 607 HFileContext meta = new HFileContextBuilder() 608 .withBlockSize(SMALL_BLOCK_SIZE) 609 .withCompression(compr) 610 .build(); 611 HFile.Writer writer = 612 HFile.getWriterFactory(conf, cacheConf) 613 .withPath(fs, hfilePath) 614 .withFileContext(meta) 615 .create(); 616 Random rand = new Random(19231737); 617 byte[] family = Bytes.toBytes("f"); 618 byte[] qualifier = Bytes.toBytes("q"); 619 for (int i = 0; i < NUM_KV; ++i) { 620 byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i); 621 622 // Key will be interpreted by KeyValue.KEY_COMPARATOR 623 KeyValue kv = 624 new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 625 RandomKeyValueUtil.randomValue(rand)); 626 byte[] k = kv.getKey(); 627 writer.append(kv); 628 keys[i] = k; 629 values[i] = CellUtil.cloneValue(kv); 630 keyStrSet.add(Bytes.toStringBinary(k)); 631 if (i > 0) { 632 assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1], 633 0, keys[i - 1].length)) > 0); 634 } 635 } 636 637 writer.close(); 638 } 639 640 // Read the HFile 641 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 642 assertEquals(expectedNumLevels, 643 reader.getTrailer().getNumDataIndexLevels()); 644 645 assertTrue(Bytes.equals(keys[0], ((KeyValue)reader.getFirstKey().get()).getKey())); 646 assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue)reader.getLastKey().get()).getKey())); 647 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1])); 648 649 for (boolean pread : new boolean[] { false, true }) { 650 HFileScanner scanner = reader.getScanner(true, pread); 651 for (int i = 0; i < NUM_KV; ++i) { 652 checkSeekTo(keys, scanner, i); 653 checkKeyValue("i=" + i, keys[i], values[i], 654 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 655 } 656 assertTrue(scanner.seekTo()); 657 for (int i = NUM_KV - 1; i >= 0; --i) { 658 checkSeekTo(keys, scanner, i); 659 checkKeyValue("i=" + i, keys[i], values[i], 660 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 661 } 662 } 663 664 // Manually compute the mid-key and validate it. 665 HFile.Reader reader2 = reader; 666 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader(); 667 668 HFileBlock.BlockIterator iter = fsReader.blockRange(0, 669 reader.getTrailer().getLoadOnOpenDataOffset()); 670 HFileBlock block; 671 List<byte[]> blockKeys = new ArrayList<>(); 672 while ((block = iter.nextBlock()) != null) { 673 if (block.getBlockType() != BlockType.LEAF_INDEX) 674 return; 675 ByteBuff b = block.getBufferReadOnly(); 676 int n = b.getIntAfterPosition(0); 677 // One int for the number of items, and n + 1 for the secondary index. 678 int entriesOffset = Bytes.SIZEOF_INT * (n + 2); 679 680 // Get all the keys from the leaf index block. S 681 for (int i = 0; i < n; ++i) { 682 int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1)); 683 int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2)); 684 int keyLen = nextKeyRelOffset - keyRelOffset; 685 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset + 686 HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 687 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset 688 + keyLen); 689 String blockKeyStr = Bytes.toString(blockKey); 690 blockKeys.add(blockKey); 691 692 // If the first key of the block is not among the keys written, we 693 // are not parsing the non-root index block format correctly. 694 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr, 695 keyStrSet.contains(blockKeyStr)); 696 } 697 } 698 699 // Validate the mid-key. 700 assertEquals( 701 Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)), 702 reader.midKey()); 703 704 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI], 705 reader.getTrailer().getUncompressedDataIndexSize()); 706 707 reader.close(); 708 reader2.close(); 709 } 710 } 711 712 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) 713 throws IOException { 714 assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0, 715 scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i]))); 716 } 717 718 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, 719 ByteBuffer buf) { 720 assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr) 721 + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0, 722 arr.length, buf.array(), buf.arrayOffset(), buf.limit())); 723 } 724 725 /** Check a key/value pair after it was read by the reader */ 726 private void checkKeyValue(String msgPrefix, byte[] expectedKey, 727 byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) { 728 if (!msgPrefix.isEmpty()) 729 msgPrefix += ". "; 730 731 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead); 732 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, 733 valueRead); 734 } 735 736 @Test 737 public void testIntermediateLevelIndicesWithLargeKeys() throws IOException { 738 testIntermediateLevelIndicesWithLargeKeys(16); 739 } 740 741 @Test 742 public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException { 743 // because of the large rowKeys, we will end up with a 50-level block index without sanity check 744 testIntermediateLevelIndicesWithLargeKeys(2); 745 } 746 747 public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException { 748 Path hfPath = new Path(TEST_UTIL.getDataTestDir(), 749 "testIntermediateLevelIndicesWithLargeKeys.hfile"); 750 int maxChunkSize = 1024; 751 FileSystem fs = FileSystem.get(conf); 752 CacheConfig cacheConf = new CacheConfig(conf); 753 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 754 conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries); 755 HFileContext context = new HFileContextBuilder().withBlockSize(16).build(); 756 HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf) 757 .withFileContext(context) 758 .withPath(fs, hfPath).create(); 759 List<byte[]> keys = new ArrayList<>(); 760 761 // This should result in leaf-level indices and a root level index 762 for (int i=0; i < 100; i++) { 763 byte[] rowkey = new byte[maxChunkSize + 1]; 764 byte[] b = Bytes.toBytes(i); 765 System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length); 766 keys.add(rowkey); 767 hfw.append(CellUtil.createCell(rowkey)); 768 } 769 hfw.close(); 770 771 HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf); 772 // Scanner doesn't do Cells yet. Fix. 773 HFileScanner scanner = reader.getScanner(true, true); 774 for (int i = 0; i < keys.size(); ++i) { 775 scanner.seekTo(CellUtil.createCell(keys.get(i))); 776 } 777 reader.close(); 778 } 779} 780