001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.ByteArrayOutputStream; 025import java.io.DataOutput; 026import java.io.DataOutputStream; 027import java.io.IOException; 028import java.nio.ByteBuffer; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Collection; 032import java.util.HashSet; 033import java.util.List; 034import java.util.Random; 035import java.util.Set; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FSDataInputStream; 038import org.apache.hadoop.fs.FSDataOutputStream; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.hbase.CellBuilderType; 042import org.apache.hadoop.hbase.CellComparatorImpl; 043import org.apache.hadoop.hbase.CellUtil; 044import org.apache.hadoop.hbase.ExtendedCellBuilderFactory; 045import org.apache.hadoop.hbase.HBaseClassTestRule; 046import org.apache.hadoop.hbase.HBaseCommonTestingUtil; 047import org.apache.hadoop.hbase.HBaseTestingUtil; 048import org.apache.hadoop.hbase.HConstants; 049import org.apache.hadoop.hbase.KeyValue; 050import org.apache.hadoop.hbase.KeyValueUtil; 051import org.apache.hadoop.hbase.PrivateCellUtil; 052import org.apache.hadoop.hbase.fs.HFileSystem; 053import org.apache.hadoop.hbase.io.ByteBuffAllocator; 054import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 055import org.apache.hadoop.hbase.io.compress.Compression; 056import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 057import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 058import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding; 059import org.apache.hadoop.hbase.io.hfile.HFile.Writer; 060import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; 061import org.apache.hadoop.hbase.io.hfile.NoOpIndexBlockEncoder.NoOpEncodedSeeker; 062import org.apache.hadoop.hbase.nio.ByteBuff; 063import org.apache.hadoop.hbase.nio.MultiByteBuff; 064import org.apache.hadoop.hbase.testclassification.IOTests; 065import org.apache.hadoop.hbase.testclassification.MediumTests; 066import org.apache.hadoop.hbase.util.Bytes; 067import org.apache.hadoop.hbase.util.ClassSize; 068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 069import org.junit.Before; 070import org.junit.ClassRule; 071import org.junit.Test; 072import org.junit.experimental.categories.Category; 073import org.junit.runner.RunWith; 074import org.junit.runners.Parameterized; 075import org.junit.runners.Parameterized.Parameters; 076import org.slf4j.Logger; 077import org.slf4j.LoggerFactory; 078 079@RunWith(Parameterized.class) 080@Category({ IOTests.class, MediumTests.class }) 081public class TestHFileBlockIndex { 082 083 @ClassRule 084 public static final HBaseClassTestRule CLASS_RULE = 085 HBaseClassTestRule.forClass(TestHFileBlockIndex.class); 086 087 @Parameters 088 public static Collection<Object[]> compressionAlgorithms() { 089 return HBaseCommonTestingUtil.COMPRESSION_ALGORITHMS_PARAMETERIZED; 090 } 091 092 public TestHFileBlockIndex(Compression.Algorithm compr) { 093 this.compr = compr; 094 } 095 096 private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class); 097 private static final Random RNG = new Random(); // This test depends on Random#setSeed 098 private static final int NUM_DATA_BLOCKS = 1000; 099 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 100 101 private static final int SMALL_BLOCK_SIZE = 4096; 102 private static final int NUM_KV = 10000; 103 104 private static FileSystem fs; 105 private Path path; 106 private long rootIndexOffset; 107 private int numRootEntries; 108 private int numLevels; 109 private static final List<byte[]> keys = new ArrayList<>(); 110 private final Compression.Algorithm compr; 111 private byte[] firstKeyInFile; 112 private Configuration conf; 113 114 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 }; 115 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 }; 116 private static final int[] UNCOMPRESSED_INDEX_SIZES = { 19187, 21813, 23086 }; 117 118 private static final boolean includesMemstoreTS = true; 119 120 static { 121 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length; 122 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length; 123 } 124 125 @Before 126 public void setUp() throws IOException { 127 keys.clear(); 128 firstKeyInFile = null; 129 conf = TEST_UTIL.getConfiguration(); 130 RNG.setSeed(2389757); 131 132 // This test requires at least HFile format version 2. 133 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); 134 135 fs = HFileSystem.get(conf); 136 } 137 138 @Test 139 public void testBlockIndex() throws IOException { 140 testBlockIndexInternals(false); 141 clear(); 142 testBlockIndexInternals(true); 143 } 144 145 private void clear() throws IOException { 146 keys.clear(); 147 firstKeyInFile = null; 148 conf = TEST_UTIL.getConfiguration(); 149 RNG.setSeed(2389757); 150 151 // This test requires at least HFile format version 2. 152 conf.setInt(HFile.FORMAT_VERSION_KEY, 3); 153 154 fs = HFileSystem.get(conf); 155 } 156 157 private void testBlockIndexInternals(boolean useTags) throws IOException { 158 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags); 159 writeWholeIndex(useTags); 160 readIndex(useTags); 161 } 162 163 /** 164 * A wrapper around a block reader which only caches the results of the last operation. Not 165 * thread-safe. 166 */ 167 private static class BlockReaderWrapper implements HFile.CachingBlockReader { 168 169 private HFileBlock.FSReader realReader; 170 private long prevOffset; 171 private long prevOnDiskSize; 172 private boolean prevPread; 173 private HFileBlock prevBlock; 174 175 public int hitCount = 0; 176 public int missCount = 0; 177 178 public BlockReaderWrapper(HFileBlock.FSReader realReader) { 179 this.realReader = realReader; 180 } 181 182 @Override 183 public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread, 184 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 185 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 186 return readBlock(offset, onDiskSize, cacheBlock, pread, isCompaction, updateCacheMetrics, 187 expectedBlockType, expectedDataBlockEncoding, false); 188 } 189 190 @Override 191 public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread, 192 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 193 DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) throws IOException { 194 if (offset == prevOffset && onDiskSize == prevOnDiskSize && pread == prevPread) { 195 hitCount += 1; 196 return prevBlock; 197 } 198 199 missCount += 1; 200 prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true); 201 prevOffset = offset; 202 prevOnDiskSize = onDiskSize; 203 prevPread = pread; 204 205 return prevBlock; 206 } 207 } 208 209 private void readIndex(boolean useTags) throws IOException { 210 long fileSize = fs.getFileStatus(path).getLen(); 211 LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString()); 212 213 FSDataInputStream istream = fs.open(path); 214 HFileContext meta = 215 new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS) 216 .withIncludesTags(useTags).withCompression(compr).build(); 217 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build(); 218 HFileBlock.FSReader blockReader = 219 new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, conf); 220 221 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader); 222 HFileBlockIndex.BlockIndexReader indexReader = 223 new HFileBlockIndex.CellBasedKeyBlockIndexReader(CellComparatorImpl.COMPARATOR, numLevels); 224 225 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, fileSize) 226 .nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries); 227 228 long prevOffset = -1; 229 int i = 0; 230 int expectedHitCount = 0; 231 int expectedMissCount = 0; 232 LOG.info("Total number of keys: " + keys.size()); 233 for (byte[] key : keys) { 234 assertTrue(key != null); 235 assertTrue(indexReader != null); 236 KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); 237 HFileBlock b = indexReader.seekToDataBlock(keyOnlyKey, null, true, true, false, null, brw); 238 if ( 239 PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0, 240 firstKeyInFile.length) < 0 241 ) { 242 assertTrue(b == null); 243 ++i; 244 continue; 245 } 246 247 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key); 248 249 assertTrue("seekToDataBlock failed for " + keyStr, b != null); 250 251 if (prevOffset == b.getOffset()) { 252 assertEquals(++expectedHitCount, brw.hitCount); 253 } else { 254 LOG.info("First key in a new block: " + keyStr + ", block offset: " + b.getOffset() + ")"); 255 assertTrue(b.getOffset() > prevOffset); 256 assertEquals(++expectedMissCount, brw.missCount); 257 prevOffset = b.getOffset(); 258 } 259 ++i; 260 } 261 262 istream.close(); 263 } 264 265 private void writeWholeIndex(boolean useTags) throws IOException { 266 assertEquals(0, keys.size()); 267 HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true) 268 .withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTags).withCompression(compr) 269 .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build(); 270 HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta); 271 FSDataOutputStream outputStream = fs.create(path); 272 HFileBlockIndex.BlockIndexWriter biw = 273 new HFileBlockIndex.BlockIndexWriter(hbw, null, null, null); 274 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) { 275 hbw.startWriting(BlockType.DATA).write(Bytes.toBytes(String.valueOf(RNG.nextInt(1000)))); 276 long blockOffset = outputStream.getPos(); 277 hbw.writeHeaderAndData(outputStream); 278 279 byte[] firstKey = null; 280 byte[] family = Bytes.toBytes("f"); 281 byte[] qualifier = Bytes.toBytes("q"); 282 for (int j = 0; j < 16; ++j) { 283 byte[] k = new KeyValue(RandomKeyValueUtil.randomOrderedKey(RNG, i * 16 + j), family, 284 qualifier, EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey(); 285 keys.add(k); 286 if (j == 8) { 287 firstKey = k; 288 } 289 } 290 assertTrue(firstKey != null); 291 if (firstKeyInFile == null) { 292 firstKeyInFile = firstKey; 293 } 294 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader()); 295 296 writeInlineBlocks(hbw, outputStream, biw, false); 297 } 298 writeInlineBlocks(hbw, outputStream, biw, true); 299 rootIndexOffset = biw.writeIndexBlocks(outputStream); 300 outputStream.close(); 301 302 numLevels = biw.getNumLevels(); 303 numRootEntries = biw.getNumRootEntries(); 304 305 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" + numRootEntries 306 + ", rootIndexOffset=" + rootIndexOffset); 307 } 308 309 private void writeInlineBlocks(HFileBlock.Writer hbw, FSDataOutputStream outputStream, 310 HFileBlockIndex.BlockIndexWriter biw, boolean isClosing) throws IOException { 311 while (biw.shouldWriteBlock(isClosing)) { 312 long offset = outputStream.getPos(); 313 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType())); 314 hbw.writeHeaderAndData(outputStream); 315 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(), 316 hbw.getUncompressedSizeWithoutHeader()); 317 LOG.info( 318 "Wrote an inline index block at " + offset + ", size " + hbw.getOnDiskSizeWithHeader()); 319 } 320 } 321 322 private static final long getDummyFileOffset(int i) { 323 return i * 185 + 379; 324 } 325 326 private static final int getDummyOnDiskSize(int i) { 327 return i * i * 37 + i * 19 + 13; 328 } 329 330 @Test 331 public void testSecondaryIndexBinarySearch() throws IOException { 332 int numTotalKeys = 99; 333 assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even. 334 335 // We only add odd-index keys into the array that we will binary-search. 336 int numSearchedKeys = (numTotalKeys - 1) / 2; 337 338 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 339 DataOutputStream dos = new DataOutputStream(baos); 340 341 dos.writeInt(numSearchedKeys); 342 int curAllEntriesSize = 0; 343 int numEntriesAdded = 0; 344 345 // Only odd-index elements of this array are used to keep the secondary 346 // index entries of the corresponding keys. 347 int secondaryIndexEntries[] = new int[numTotalKeys]; 348 349 for (int i = 0; i < numTotalKeys; ++i) { 350 byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i * 2); 351 KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("val")); 352 // KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length); 353 keys.add(cell.getKey()); 354 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): "; 355 StringBuilder padding = new StringBuilder(); 356 while (msgPrefix.length() + padding.length() < 70) 357 padding.append(' '); 358 msgPrefix += padding; 359 if (i % 2 == 1) { 360 dos.writeInt(curAllEntriesSize); 361 secondaryIndexEntries[i] = curAllEntriesSize; 362 LOG.info( 363 msgPrefix + "secondary index entry #" + ((i - 1) / 2) + ", offset " + curAllEntriesSize); 364 curAllEntriesSize += cell.getKey().length + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 365 ++numEntriesAdded; 366 } else { 367 secondaryIndexEntries[i] = -1; 368 LOG.info(msgPrefix + "not in the searched array"); 369 } 370 } 371 372 // Make sure the keys are increasing. 373 for (int i = 0; i < keys.size() - 1; ++i) 374 assertTrue(CellComparatorImpl.COMPARATOR.compare( 375 new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length), 376 new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0); 377 378 dos.writeInt(curAllEntriesSize); 379 assertEquals(numSearchedKeys, numEntriesAdded); 380 int secondaryIndexOffset = dos.size(); 381 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2), secondaryIndexOffset); 382 383 for (int i = 1; i <= numTotalKeys - 1; i += 2) { 384 assertEquals(dos.size(), secondaryIndexOffset + secondaryIndexEntries[i]); 385 long dummyFileOffset = getDummyFileOffset(i); 386 int dummyOnDiskSize = getDummyOnDiskSize(i); 387 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" + dummyOnDiskSize 388 + " at offset " + dos.size()); 389 dos.writeLong(dummyFileOffset); 390 dos.writeInt(dummyOnDiskSize); 391 LOG.debug("Stored key " + ((i - 1) / 2) + " at offset " + dos.size()); 392 dos.write(keys.get(i)); 393 } 394 395 dos.writeInt(curAllEntriesSize); 396 397 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray()); 398 for (int i = 0; i < numTotalKeys; ++i) { 399 byte[] searchKey = keys.get(i); 400 byte[] arrayHoldingKey = new byte[searchKey.length + searchKey.length / 2]; 401 402 // To make things a bit more interesting, store the key we are looking 403 // for at a non-zero offset in a new array. 404 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2, searchKey.length); 405 406 KeyValue.KeyOnlyKeyValue cell = 407 new KeyValue.KeyOnlyKeyValue(arrayHoldingKey, searchKey.length / 2, searchKey.length); 408 int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell, 409 new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR); 410 String lookupFailureMsg = 411 "Failed to look up key #" + i + " (" + Bytes.toStringBinary(searchKey) + ")"; 412 413 int expectedResult; 414 int referenceItem; 415 416 if (i % 2 == 1) { 417 // This key is in the array we search as the element (i - 1) / 2. Make 418 // sure we find it. 419 expectedResult = (i - 1) / 2; 420 referenceItem = i; 421 } else { 422 // This key is not in the array but between two elements on the array, 423 // in the beginning, or in the end. The result should be the previous 424 // key in the searched array, or -1 for i = 0. 425 expectedResult = i / 2 - 1; 426 referenceItem = i - 1; 427 } 428 429 assertEquals(lookupFailureMsg, expectedResult, searchResult); 430 431 // Now test we can get the offset and the on-disk-size using a 432 // higher-level API function.s 433 boolean locateBlockResult = 434 (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell, 435 CellComparatorImpl.COMPARATOR) != -1); 436 437 if (i == 0) { 438 assertFalse(locateBlockResult); 439 } else { 440 assertTrue(locateBlockResult); 441 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position(); 442 assertEquals(errorMsg, getDummyFileOffset(referenceItem), nonRootIndex.getLong()); 443 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem), nonRootIndex.getInt()); 444 } 445 } 446 447 } 448 449 @Test 450 public void testBlockIndexChunk() throws IOException { 451 BlockIndexChunk c = new HFileBlockIndex.BlockIndexChunkImpl(); 452 HFileIndexBlockEncoder indexBlockEncoder = NoOpIndexBlockEncoder.INSTANCE; 453 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 454 int N = 1000; 455 int[] numSubEntriesAt = new int[N]; 456 int numSubEntries = 0; 457 for (int i = 0; i < N; ++i) { 458 baos.reset(); 459 DataOutputStream dos = new DataOutputStream(baos); 460 indexBlockEncoder.encode(c, false, dos); 461 assertEquals(c.getNonRootSize(), dos.size()); 462 463 baos.reset(); 464 dos = new DataOutputStream(baos); 465 indexBlockEncoder.encode(c, true, dos); 466 assertEquals(c.getRootSize(), dos.size()); 467 468 byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i); 469 numSubEntries += RNG.nextInt(5) + 1; 470 keys.add(k); 471 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries); 472 } 473 474 // Test the ability to look up the entry that contains a particular 475 // deeper-level index block's entry ("sub-entry"), assuming a global 476 // 0-based ordering of sub-entries. This is needed for mid-key calculation. 477 for (int i = 0; i < N; ++i) { 478 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1]; j < numSubEntriesAt[i]; ++j) { 479 assertEquals(i, c.getEntryBySubEntry(j)); 480 } 481 } 482 } 483 484 /** Checks if the HeapSize calculator is within reason */ 485 @Test 486 public void testHeapSizeForBlockIndex() throws IOException { 487 Class<HFileBlockIndex.BlockIndexReader> cl = HFileBlockIndex.BlockIndexReader.class; 488 long expected = ClassSize.estimateBase(cl, false); 489 490 HFileBlockIndex.BlockIndexReader bi = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 491 long actual = bi.heapSize(); 492 493 // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets, 494 // int [] blockDataSizes) are all null they are not going to show up in the 495 // HeapSize calculation, so need to remove those array costs from expected. 496 // Already the block keys are not there in this case 497 expected -= ClassSize.align(2 * ClassSize.ARRAY); 498 499 if (expected != actual) { 500 expected = ClassSize.estimateBase(cl, true); 501 assertEquals(expected, actual); 502 } 503 } 504 505 /** 506 * to check if looks good when midKey on a leaf index block boundary 507 */ 508 @Test 509 public void testMidKeyOnLeafIndexBlockBoundary() throws IOException { 510 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey"); 511 int maxChunkSize = 512; 512 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 513 // should open hfile.block.index.cacheonwrite 514 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true); 515 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 516 BlockCache blockCache = cacheConf.getBlockCache().get(); 517 // Evict all blocks that were cached-on-write by the previous invocation. 518 blockCache.evictBlocksByHfileName(hfilePath.getName()); 519 // Write the HFile 520 HFileContext meta = new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE) 521 .withCompression(Algorithm.NONE).withDataBlockEncoding(DataBlockEncoding.NONE).build(); 522 HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath) 523 .withFileContext(meta).create(); 524 Random rand = new Random(19231737); 525 byte[] family = Bytes.toBytes("f"); 526 byte[] qualifier = Bytes.toBytes("q"); 527 int kvNumberToBeWritten = 16; 528 // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks, 529 // midkey is just on the boundary of the first leaf-index block 530 for (int i = 0; i < kvNumberToBeWritten; ++i) { 531 byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30); 532 533 // Key will be interpreted by KeyValue.KEY_COMPARATOR 534 KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 535 RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE)); 536 writer.append(kv); 537 } 538 writer.close(); 539 540 // close hfile.block.index.cacheonwrite 541 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false); 542 543 // Read the HFile 544 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 545 546 boolean hasArrayIndexOutOfBoundsException = false; 547 try { 548 // get the mid-key. 549 reader.midKey(); 550 } catch (ArrayIndexOutOfBoundsException e) { 551 hasArrayIndexOutOfBoundsException = true; 552 } finally { 553 reader.close(); 554 } 555 556 // to check if ArrayIndexOutOfBoundsException occurred 557 assertFalse(hasArrayIndexOutOfBoundsException); 558 } 559 560 /** 561 * Testing block index through the HFile writer/reader APIs. Allows to test setting index block 562 * size through configuration, intermediate-level index blocks, and caching index blocks on write. 563 */ 564 @Test 565 public void testHFileWriterAndReader() throws IOException { 566 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_block_index"); 567 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 568 BlockCache blockCache = cacheConf.getBlockCache().get(); 569 570 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) { 571 int indexBlockSize = INDEX_CHUNK_SIZES[testI]; 572 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; 573 LOG.info("Index block size: " + indexBlockSize + ", compression: " + compr); 574 // Evict all blocks that were cached-on-write by the previous invocation. 575 blockCache.evictBlocksByHfileName(hfilePath.getName()); 576 577 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); 578 Set<String> keyStrSet = new HashSet<>(); 579 byte[][] keys = new byte[NUM_KV][]; 580 byte[][] values = new byte[NUM_KV][]; 581 582 // Write the HFile 583 { 584 HFileContext meta = 585 new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(compr).build(); 586 HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath) 587 .withFileContext(meta).create(); 588 Random rand = new Random(19231737); 589 byte[] family = Bytes.toBytes("f"); 590 byte[] qualifier = Bytes.toBytes("q"); 591 for (int i = 0; i < NUM_KV; ++i) { 592 byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i); 593 594 // Key will be interpreted by KeyValue.KEY_COMPARATOR 595 KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 596 RandomKeyValueUtil.randomValue(rand)); 597 byte[] k = kv.getKey(); 598 writer.append(kv); 599 keys[i] = k; 600 values[i] = CellUtil.cloneValue(kv); 601 keyStrSet.add(Bytes.toStringBinary(k)); 602 if (i > 0) { 603 assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1], 0, 604 keys[i - 1].length)) > 0); 605 } 606 } 607 608 writer.close(); 609 } 610 611 // Read the HFile 612 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 613 assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); 614 615 assertTrue(Bytes.equals(keys[0], ((KeyValue) reader.getFirstKey().get()).getKey())); 616 assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue) reader.getLastKey().get()).getKey())); 617 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1])); 618 619 for (boolean pread : new boolean[] { false, true }) { 620 HFileScanner scanner = reader.getScanner(conf, true, pread); 621 for (int i = 0; i < NUM_KV; ++i) { 622 checkSeekTo(keys, scanner, i); 623 checkKeyValue("i=" + i, keys[i], values[i], 624 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 625 } 626 assertTrue(scanner.seekTo()); 627 for (int i = NUM_KV - 1; i >= 0; --i) { 628 checkSeekTo(keys, scanner, i); 629 checkKeyValue("i=" + i, keys[i], values[i], 630 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 631 } 632 } 633 634 // Manually compute the mid-key and validate it. 635 HFile.Reader reader2 = reader; 636 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader(); 637 638 HFileBlock.BlockIterator iter = 639 fsReader.blockRange(0, reader.getTrailer().getLoadOnOpenDataOffset()); 640 HFileBlock block; 641 List<byte[]> blockKeys = new ArrayList<>(); 642 while ((block = iter.nextBlock()) != null) { 643 if (block.getBlockType() != BlockType.LEAF_INDEX) return; 644 ByteBuff b = block.getBufferReadOnly(); 645 int n = b.getIntAfterPosition(0); 646 // One int for the number of items, and n + 1 for the secondary index. 647 int entriesOffset = Bytes.SIZEOF_INT * (n + 2); 648 649 // Get all the keys from the leaf index block. S 650 for (int i = 0; i < n; ++i) { 651 int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1)); 652 int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2)); 653 int keyLen = nextKeyRelOffset - keyRelOffset; 654 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset 655 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 656 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen); 657 String blockKeyStr = Bytes.toString(blockKey); 658 blockKeys.add(blockKey); 659 660 // If the first key of the block is not among the keys written, we 661 // are not parsing the non-root index block format correctly. 662 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr, 663 keyStrSet.contains(blockKeyStr)); 664 } 665 } 666 667 // Validate the mid-key. 668 assertEquals(Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)), 669 reader.midKey()); 670 671 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI], 672 reader.getTrailer().getUncompressedDataIndexSize()); 673 674 reader.close(); 675 reader2.close(); 676 } 677 } 678 679 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) throws IOException { 680 assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0, 681 scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i]))); 682 } 683 684 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, ByteBuffer buf) { 685 assertEquals( 686 msgPrefix + ": expected " + Bytes.toStringBinary(arr) + ", actual " 687 + Bytes.toStringBinary(buf), 688 0, Bytes.compareTo(arr, 0, arr.length, buf.array(), buf.arrayOffset(), buf.limit())); 689 } 690 691 /** Check a key/value pair after it was read by the reader */ 692 private void checkKeyValue(String msgPrefix, byte[] expectedKey, byte[] expectedValue, 693 ByteBuffer keyRead, ByteBuffer valueRead) { 694 if (!msgPrefix.isEmpty()) msgPrefix += ". "; 695 696 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead); 697 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, valueRead); 698 } 699 700 @Test 701 public void testIntermediateLevelIndicesWithLargeKeys() throws IOException { 702 testIntermediateLevelIndicesWithLargeKeys(16); 703 } 704 705 @Test 706 public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException { 707 // because of the large rowKeys, we will end up with a 50-level block index without sanity check 708 testIntermediateLevelIndicesWithLargeKeys(2); 709 } 710 711 public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException { 712 Path hfPath = 713 new Path(TEST_UTIL.getDataTestDir(), "testIntermediateLevelIndicesWithLargeKeys.hfile"); 714 int maxChunkSize = 1024; 715 FileSystem fs = FileSystem.get(conf); 716 CacheConfig cacheConf = new CacheConfig(conf); 717 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 718 conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries); 719 HFileContext context = new HFileContextBuilder().withBlockSize(16).build(); 720 HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf).withFileContext(context) 721 .withPath(fs, hfPath).create(); 722 List<byte[]> keys = new ArrayList<>(); 723 724 // This should result in leaf-level indices and a root level index 725 for (int i = 0; i < 100; i++) { 726 byte[] rowkey = new byte[maxChunkSize + 1]; 727 byte[] b = Bytes.toBytes(i); 728 System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length); 729 keys.add(rowkey); 730 hfw.append(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY).setRow(rowkey) 731 .setFamily(HConstants.EMPTY_BYTE_ARRAY).setQualifier(HConstants.EMPTY_BYTE_ARRAY) 732 .setTimestamp(HConstants.LATEST_TIMESTAMP).setType(KeyValue.Type.Maximum.getCode()) 733 .setValue(HConstants.EMPTY_BYTE_ARRAY).build()); 734 } 735 hfw.close(); 736 737 HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf); 738 // Scanner doesn't do Cells yet. Fix. 739 HFileScanner scanner = reader.getScanner(conf, true, true); 740 for (int i = 0; i < keys.size(); ++i) { 741 scanner.seekTo(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY) 742 .setRow(keys.get(i)).setFamily(HConstants.EMPTY_BYTE_ARRAY) 743 .setQualifier(HConstants.EMPTY_BYTE_ARRAY).setTimestamp(HConstants.LATEST_TIMESTAMP) 744 .setType(KeyValue.Type.Maximum.getCode()).setValue(HConstants.EMPTY_BYTE_ARRAY).build()); 745 } 746 reader.close(); 747 } 748 749 /** 750 * This test is for HBASE-27940, which midkey metadata in root index block would always be ignored 751 * by {@link BlockIndexReader#readMultiLevelIndexRoot}. 752 */ 753 @Test 754 public void testMidKeyReadSuccessfullyFromRootIndexBlock() throws IOException { 755 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, 128); 756 Path hfilePath = 757 new Path(TEST_UTIL.getDataTestDir(), "testMidKeyReadSuccessfullyFromRootIndexBlock"); 758 Compression.Algorithm compressAlgo = Compression.Algorithm.NONE; 759 int entryCount = 50000; 760 HFileContext context = new HFileContextBuilder().withBlockSize(4096).withIncludesTags(false) 761 .withDataBlockEncoding(DataBlockEncoding.NONE).withCompression(compressAlgo).build(); 762 763 try (HFile.Writer writer = new HFile.WriterFactory(conf, new CacheConfig(conf)) 764 .withPath(fs, hfilePath).withFileContext(context).create()) { 765 766 List<KeyValue> keyValues = new ArrayList<>(entryCount); 767 for (int i = 0; i < entryCount; ++i) { 768 byte[] keyBytes = RandomKeyValueUtil.randomOrderedKey(RNG, i); 769 // A random-length random value. 770 byte[] valueBytes = RandomKeyValueUtil.randomValue(RNG); 771 KeyValue keyValue = 772 new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes); 773 writer.append(keyValue); 774 keyValues.add(keyValue); 775 } 776 } 777 778 try (FSDataInputStream fsdis = fs.open(hfilePath)) { 779 long fileSize = fs.getFileStatus(hfilePath).getLen(); 780 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, fileSize); 781 782 assertEquals(3, trailer.getMajorVersion()); 783 assertEquals(entryCount, trailer.getEntryCount()); 784 HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo) 785 .withIncludesMvcc(false).withIncludesTags(false) 786 .withDataBlockEncoding(DataBlockEncoding.NONE).withHBaseCheckSum(true).build(); 787 ReaderContext readerContext = 788 new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(fsdis)) 789 .withFilePath(hfilePath).withFileSystem(fs).withFileSize(fileSize).build(); 790 HFileBlock.FSReader blockReader = 791 new HFileBlock.FSReaderImpl(readerContext, meta, ByteBuffAllocator.HEAP, conf); 792 793 MyEncoder encoder = new MyEncoder(); 794 HFileBlockIndex.CellBasedKeyBlockIndexReaderV2 dataBlockIndexReader = 795 new HFileBlockIndex.CellBasedKeyBlockIndexReaderV2(trailer.createComparator(), 796 trailer.getNumDataIndexLevels(), encoder); 797 798 HFileBlock.BlockIterator blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(), 799 fileSize - trailer.getTrailerSize()); 800 // Data index. We also read statistics about the block index written after 801 // the root level. 802 dataBlockIndexReader.readMultiLevelIndexRoot( 803 blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); 804 NoOpEncodedSeeker noOpEncodedSeeker = (NoOpEncodedSeeker) encoder.encoderSeeker; 805 // Assert we have read midkey metadata successfully. 806 assertTrue(noOpEncodedSeeker.midLeafBlockOffset >= 0); 807 assertTrue(noOpEncodedSeeker.midLeafBlockOnDiskSize > 0); 808 assertTrue(noOpEncodedSeeker.midKeyEntry >= 0); 809 } 810 } 811 812 static class MyEncoder implements HFileIndexBlockEncoder { 813 814 EncodedSeeker encoderSeeker; 815 816 @Override 817 public void saveMetadata(Writer writer) throws IOException { 818 NoOpIndexBlockEncoder.INSTANCE.saveMetadata(writer); 819 820 } 821 822 @Override 823 public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out) 824 throws IOException { 825 NoOpIndexBlockEncoder.INSTANCE.encode(blockIndexChunk, rootIndexBlock, out); 826 } 827 828 @Override 829 public IndexBlockEncoding getIndexBlockEncoding() { 830 return NoOpIndexBlockEncoder.INSTANCE.getIndexBlockEncoding(); 831 } 832 833 @Override 834 public EncodedSeeker createSeeker() { 835 encoderSeeker = NoOpIndexBlockEncoder.INSTANCE.createSeeker(); 836 return encoderSeeker; 837 } 838 839 } 840}