001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.ByteArrayOutputStream;
025import java.io.DataOutputStream;
026import java.io.IOException;
027import java.nio.ByteBuffer;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Random;
034import java.util.Set;
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.FSDataInputStream;
037import org.apache.hadoop.fs.FSDataOutputStream;
038import org.apache.hadoop.fs.FileSystem;
039import org.apache.hadoop.fs.Path;
040import org.apache.hadoop.hbase.CellComparatorImpl;
041import org.apache.hadoop.hbase.CellUtil;
042import org.apache.hadoop.hbase.HBaseClassTestRule;
043import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
044import org.apache.hadoop.hbase.HBaseTestingUtility;
045import org.apache.hadoop.hbase.KeyValue;
046import org.apache.hadoop.hbase.KeyValueUtil;
047import org.apache.hadoop.hbase.PrivateCellUtil;
048import org.apache.hadoop.hbase.fs.HFileSystem;
049import org.apache.hadoop.hbase.io.ByteBuffAllocator;
050import org.apache.hadoop.hbase.io.compress.Compression;
051import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
052import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
053import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
054import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
055import org.apache.hadoop.hbase.nio.ByteBuff;
056import org.apache.hadoop.hbase.nio.MultiByteBuff;
057import org.apache.hadoop.hbase.testclassification.IOTests;
058import org.apache.hadoop.hbase.testclassification.MediumTests;
059import org.apache.hadoop.hbase.util.Bytes;
060import org.apache.hadoop.hbase.util.ClassSize;
061import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
062import org.junit.Before;
063import org.junit.ClassRule;
064import org.junit.Test;
065import org.junit.experimental.categories.Category;
066import org.junit.runner.RunWith;
067import org.junit.runners.Parameterized;
068import org.junit.runners.Parameterized.Parameters;
069import org.slf4j.Logger;
070import org.slf4j.LoggerFactory;
071
072@RunWith(Parameterized.class)
073@Category({ IOTests.class, MediumTests.class })
074public class TestHFileBlockIndex {
075
076  @ClassRule
077  public static final HBaseClassTestRule CLASS_RULE =
078    HBaseClassTestRule.forClass(TestHFileBlockIndex.class);
079
080  @Parameters
081  public static Collection<Object[]> compressionAlgorithms() {
082    return HBaseCommonTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
083  }
084
085  public TestHFileBlockIndex(Compression.Algorithm compr) {
086    this.compr = compr;
087  }
088
089  private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class);
090  private static final Random RNG = new Random(); // This test depends on Random#setSeed
091  private static final int NUM_DATA_BLOCKS = 1000;
092  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
093
094  private static final int SMALL_BLOCK_SIZE = 4096;
095  private static final int NUM_KV = 10000;
096
097  private static FileSystem fs;
098  private Path path;
099  private long rootIndexOffset;
100  private int numRootEntries;
101  private int numLevels;
102  private static final List<byte[]> keys = new ArrayList<>();
103  private final Compression.Algorithm compr;
104  private byte[] firstKeyInFile;
105  private Configuration conf;
106
107  private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
108  private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
109  private static final int[] UNCOMPRESSED_INDEX_SIZES = { 19187, 21813, 23086 };
110
111  private static final boolean includesMemstoreTS = true;
112
113  static {
114    assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
115    assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
116  }
117
118  @Before
119  public void setUp() throws IOException {
120    keys.clear();
121    firstKeyInFile = null;
122    conf = TEST_UTIL.getConfiguration();
123    RNG.setSeed(2389757);
124
125    // This test requires at least HFile format version 2.
126    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
127
128    fs = HFileSystem.get(conf);
129  }
130
131  @Test
132  public void testBlockIndex() throws IOException {
133    testBlockIndexInternals(false);
134    clear();
135    testBlockIndexInternals(true);
136  }
137
138  private void clear() throws IOException {
139    keys.clear();
140    firstKeyInFile = null;
141    conf = TEST_UTIL.getConfiguration();
142    RNG.setSeed(2389757);
143
144    // This test requires at least HFile format version 2.
145    conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
146
147    fs = HFileSystem.get(conf);
148  }
149
150  private void testBlockIndexInternals(boolean useTags) throws IOException {
151    path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
152    writeWholeIndex(useTags);
153    readIndex(useTags);
154  }
155
156  /**
157   * A wrapper around a block reader which only caches the results of the last operation. Not
158   * thread-safe.
159   */
160  private static class BlockReaderWrapper implements HFile.CachingBlockReader {
161
162    private HFileBlock.FSReader realReader;
163    private long prevOffset;
164    private long prevOnDiskSize;
165    private boolean prevPread;
166    private HFileBlock prevBlock;
167
168    public int hitCount = 0;
169    public int missCount = 0;
170
171    public BlockReaderWrapper(HFileBlock.FSReader realReader) {
172      this.realReader = realReader;
173    }
174
175    @Override
176    public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread,
177      boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
178      DataBlockEncoding expectedDataBlockEncoding) throws IOException {
179      if (offset == prevOffset && onDiskSize == prevOnDiskSize && pread == prevPread) {
180        hitCount += 1;
181        return prevBlock;
182      }
183
184      missCount += 1;
185      prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true);
186      prevOffset = offset;
187      prevOnDiskSize = onDiskSize;
188      prevPread = pread;
189
190      return prevBlock;
191    }
192  }
193
194  private void readIndex(boolean useTags) throws IOException {
195    long fileSize = fs.getFileStatus(path).getLen();
196    LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString());
197
198    FSDataInputStream istream = fs.open(path);
199    HFileContext meta =
200      new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS)
201        .withIncludesTags(useTags).withCompression(compr).build();
202    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build();
203    HFileBlock.FSReader blockReader =
204      new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, conf);
205
206    BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
207    HFileBlockIndex.BlockIndexReader indexReader =
208      new HFileBlockIndex.CellBasedKeyBlockIndexReader(CellComparatorImpl.COMPARATOR, numLevels);
209
210    indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, fileSize)
211      .nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
212
213    long prevOffset = -1;
214    int i = 0;
215    int expectedHitCount = 0;
216    int expectedMissCount = 0;
217    LOG.info("Total number of keys: " + keys.size());
218    for (byte[] key : keys) {
219      assertTrue(key != null);
220      assertTrue(indexReader != null);
221      KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
222      HFileBlock b = indexReader.seekToDataBlock(keyOnlyKey, null, true, true, false, null, brw);
223      if (
224        PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0,
225          firstKeyInFile.length) < 0
226      ) {
227        assertTrue(b == null);
228        ++i;
229        continue;
230      }
231
232      String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
233
234      assertTrue("seekToDataBlock failed for " + keyStr, b != null);
235
236      if (prevOffset == b.getOffset()) {
237        assertEquals(++expectedHitCount, brw.hitCount);
238      } else {
239        LOG.info("First key in a new block: " + keyStr + ", block offset: " + b.getOffset() + ")");
240        assertTrue(b.getOffset() > prevOffset);
241        assertEquals(++expectedMissCount, brw.missCount);
242        prevOffset = b.getOffset();
243      }
244      ++i;
245    }
246
247    istream.close();
248  }
249
250  private void writeWholeIndex(boolean useTags) throws IOException {
251    assertEquals(0, keys.size());
252    HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true)
253      .withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTags).withCompression(compr)
254      .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build();
255    HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta);
256    FSDataOutputStream outputStream = fs.create(path);
257    HFileBlockIndex.BlockIndexWriter biw = new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
258    for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
259      hbw.startWriting(BlockType.DATA).write(Bytes.toBytes(String.valueOf(RNG.nextInt(1000))));
260      long blockOffset = outputStream.getPos();
261      hbw.writeHeaderAndData(outputStream);
262
263      byte[] firstKey = null;
264      byte[] family = Bytes.toBytes("f");
265      byte[] qualifier = Bytes.toBytes("q");
266      for (int j = 0; j < 16; ++j) {
267        byte[] k = new KeyValue(RandomKeyValueUtil.randomOrderedKey(RNG, i * 16 + j), family,
268          qualifier, EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey();
269        keys.add(k);
270        if (j == 8) {
271          firstKey = k;
272        }
273      }
274      assertTrue(firstKey != null);
275      if (firstKeyInFile == null) {
276        firstKeyInFile = firstKey;
277      }
278      biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
279
280      writeInlineBlocks(hbw, outputStream, biw, false);
281    }
282    writeInlineBlocks(hbw, outputStream, biw, true);
283    rootIndexOffset = biw.writeIndexBlocks(outputStream);
284    outputStream.close();
285
286    numLevels = biw.getNumLevels();
287    numRootEntries = biw.getNumRootEntries();
288
289    LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" + numRootEntries
290      + ", rootIndexOffset=" + rootIndexOffset);
291  }
292
293  private void writeInlineBlocks(HFileBlock.Writer hbw, FSDataOutputStream outputStream,
294    HFileBlockIndex.BlockIndexWriter biw, boolean isClosing) throws IOException {
295    while (biw.shouldWriteBlock(isClosing)) {
296      long offset = outputStream.getPos();
297      biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
298      hbw.writeHeaderAndData(outputStream);
299      biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
300        hbw.getUncompressedSizeWithoutHeader());
301      LOG.info(
302        "Wrote an inline index block at " + offset + ", size " + hbw.getOnDiskSizeWithHeader());
303    }
304  }
305
306  private static final long getDummyFileOffset(int i) {
307    return i * 185 + 379;
308  }
309
310  private static final int getDummyOnDiskSize(int i) {
311    return i * i * 37 + i * 19 + 13;
312  }
313
314  @Test
315  public void testSecondaryIndexBinarySearch() throws IOException {
316    int numTotalKeys = 99;
317    assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
318
319    // We only add odd-index keys into the array that we will binary-search.
320    int numSearchedKeys = (numTotalKeys - 1) / 2;
321
322    ByteArrayOutputStream baos = new ByteArrayOutputStream();
323    DataOutputStream dos = new DataOutputStream(baos);
324
325    dos.writeInt(numSearchedKeys);
326    int curAllEntriesSize = 0;
327    int numEntriesAdded = 0;
328
329    // Only odd-index elements of this array are used to keep the secondary
330    // index entries of the corresponding keys.
331    int secondaryIndexEntries[] = new int[numTotalKeys];
332
333    for (int i = 0; i < numTotalKeys; ++i) {
334      byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i * 2);
335      KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("val"));
336      // KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length);
337      keys.add(cell.getKey());
338      String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
339      StringBuilder padding = new StringBuilder();
340      while (msgPrefix.length() + padding.length() < 70)
341        padding.append(' ');
342      msgPrefix += padding;
343      if (i % 2 == 1) {
344        dos.writeInt(curAllEntriesSize);
345        secondaryIndexEntries[i] = curAllEntriesSize;
346        LOG.info(
347          msgPrefix + "secondary index entry #" + ((i - 1) / 2) + ", offset " + curAllEntriesSize);
348        curAllEntriesSize += cell.getKey().length + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
349        ++numEntriesAdded;
350      } else {
351        secondaryIndexEntries[i] = -1;
352        LOG.info(msgPrefix + "not in the searched array");
353      }
354    }
355
356    // Make sure the keys are increasing.
357    for (int i = 0; i < keys.size() - 1; ++i)
358      assertTrue(CellComparatorImpl.COMPARATOR.compare(
359        new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length),
360        new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0);
361
362    dos.writeInt(curAllEntriesSize);
363    assertEquals(numSearchedKeys, numEntriesAdded);
364    int secondaryIndexOffset = dos.size();
365    assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2), secondaryIndexOffset);
366
367    for (int i = 1; i <= numTotalKeys - 1; i += 2) {
368      assertEquals(dos.size(), secondaryIndexOffset + secondaryIndexEntries[i]);
369      long dummyFileOffset = getDummyFileOffset(i);
370      int dummyOnDiskSize = getDummyOnDiskSize(i);
371      LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" + dummyOnDiskSize
372        + " at offset " + dos.size());
373      dos.writeLong(dummyFileOffset);
374      dos.writeInt(dummyOnDiskSize);
375      LOG.debug("Stored key " + ((i - 1) / 2) + " at offset " + dos.size());
376      dos.write(keys.get(i));
377    }
378
379    dos.writeInt(curAllEntriesSize);
380
381    ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
382    for (int i = 0; i < numTotalKeys; ++i) {
383      byte[] searchKey = keys.get(i);
384      byte[] arrayHoldingKey = new byte[searchKey.length + searchKey.length / 2];
385
386      // To make things a bit more interesting, store the key we are looking
387      // for at a non-zero offset in a new array.
388      System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2, searchKey.length);
389
390      KeyValue.KeyOnlyKeyValue cell =
391        new KeyValue.KeyOnlyKeyValue(arrayHoldingKey, searchKey.length / 2, searchKey.length);
392      int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell,
393        new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR);
394      String lookupFailureMsg =
395        "Failed to look up key #" + i + " (" + Bytes.toStringBinary(searchKey) + ")";
396
397      int expectedResult;
398      int referenceItem;
399
400      if (i % 2 == 1) {
401        // This key is in the array we search as the element (i - 1) / 2. Make
402        // sure we find it.
403        expectedResult = (i - 1) / 2;
404        referenceItem = i;
405      } else {
406        // This key is not in the array but between two elements on the array,
407        // in the beginning, or in the end. The result should be the previous
408        // key in the searched array, or -1 for i = 0.
409        expectedResult = i / 2 - 1;
410        referenceItem = i - 1;
411      }
412
413      assertEquals(lookupFailureMsg, expectedResult, searchResult);
414
415      // Now test we can get the offset and the on-disk-size using a
416      // higher-level API function.s
417      boolean locateBlockResult =
418        (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell,
419          CellComparatorImpl.COMPARATOR) != -1);
420
421      if (i == 0) {
422        assertFalse(locateBlockResult);
423      } else {
424        assertTrue(locateBlockResult);
425        String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
426        assertEquals(errorMsg, getDummyFileOffset(referenceItem), nonRootIndex.getLong());
427        assertEquals(errorMsg, getDummyOnDiskSize(referenceItem), nonRootIndex.getInt());
428      }
429    }
430
431  }
432
433  @Test
434  public void testBlockIndexChunk() throws IOException {
435    BlockIndexChunk c = new BlockIndexChunk();
436    ByteArrayOutputStream baos = new ByteArrayOutputStream();
437    int N = 1000;
438    int[] numSubEntriesAt = new int[N];
439    int numSubEntries = 0;
440    for (int i = 0; i < N; ++i) {
441      baos.reset();
442      DataOutputStream dos = new DataOutputStream(baos);
443      c.writeNonRoot(dos);
444      assertEquals(c.getNonRootSize(), dos.size());
445
446      baos.reset();
447      dos = new DataOutputStream(baos);
448      c.writeRoot(dos);
449      assertEquals(c.getRootSize(), dos.size());
450
451      byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i);
452      numSubEntries += RNG.nextInt(5) + 1;
453      keys.add(k);
454      c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
455    }
456
457    // Test the ability to look up the entry that contains a particular
458    // deeper-level index block's entry ("sub-entry"), assuming a global
459    // 0-based ordering of sub-entries. This is needed for mid-key calculation.
460    for (int i = 0; i < N; ++i) {
461      for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1]; j < numSubEntriesAt[i]; ++j) {
462        assertEquals(i, c.getEntryBySubEntry(j));
463      }
464    }
465  }
466
467  /** Checks if the HeapSize calculator is within reason */
468  @Test
469  public void testHeapSizeForBlockIndex() throws IOException {
470    Class<HFileBlockIndex.BlockIndexReader> cl = HFileBlockIndex.BlockIndexReader.class;
471    long expected = ClassSize.estimateBase(cl, false);
472
473    HFileBlockIndex.BlockIndexReader bi = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
474    long actual = bi.heapSize();
475
476    // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
477    // int [] blockDataSizes) are all null they are not going to show up in the
478    // HeapSize calculation, so need to remove those array costs from expected.
479    // Already the block keys are not there in this case
480    expected -= ClassSize.align(2 * ClassSize.ARRAY);
481
482    if (expected != actual) {
483      expected = ClassSize.estimateBase(cl, true);
484      assertEquals(expected, actual);
485    }
486  }
487
488  /**
489   * to check if looks good when midKey on a leaf index block boundary n
490   */
491  @Test
492  public void testMidKeyOnLeafIndexBlockBoundary() throws IOException {
493    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey");
494    int maxChunkSize = 512;
495    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
496    // should open hfile.block.index.cacheonwrite
497    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true);
498    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
499    BlockCache blockCache = cacheConf.getBlockCache().get();
500    // Evict all blocks that were cached-on-write by the previous invocation.
501    blockCache.evictBlocksByHfileName(hfilePath.getName());
502    // Write the HFile
503    HFileContext meta = new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE)
504      .withCompression(Algorithm.NONE).withDataBlockEncoding(DataBlockEncoding.NONE).build();
505    HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath)
506      .withFileContext(meta).create();
507    Random rand = new Random(19231737);
508    byte[] family = Bytes.toBytes("f");
509    byte[] qualifier = Bytes.toBytes("q");
510    int kvNumberToBeWritten = 16;
511    // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks,
512    // midkey is just on the boundary of the first leaf-index block
513    for (int i = 0; i < kvNumberToBeWritten; ++i) {
514      byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30);
515
516      // Key will be interpreted by KeyValue.KEY_COMPARATOR
517      KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
518        RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE));
519      writer.append(kv);
520    }
521    writer.close();
522
523    // close hfile.block.index.cacheonwrite
524    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
525
526    // Read the HFile
527    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
528
529    boolean hasArrayIndexOutOfBoundsException = false;
530    try {
531      // get the mid-key.
532      reader.midKey();
533    } catch (ArrayIndexOutOfBoundsException e) {
534      hasArrayIndexOutOfBoundsException = true;
535    } finally {
536      reader.close();
537    }
538
539    // to check if ArrayIndexOutOfBoundsException occurred
540    assertFalse(hasArrayIndexOutOfBoundsException);
541  }
542
543  /**
544   * Testing block index through the HFile writer/reader APIs. Allows to test setting index block
545   * size through configuration, intermediate-level index blocks, and caching index blocks on write.
546   * n
547   */
548  @Test
549  public void testHFileWriterAndReader() throws IOException {
550    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_block_index");
551    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
552    BlockCache blockCache = cacheConf.getBlockCache().get();
553
554    for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
555      int indexBlockSize = INDEX_CHUNK_SIZES[testI];
556      int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
557      LOG.info("Index block size: " + indexBlockSize + ", compression: " + compr);
558      // Evict all blocks that were cached-on-write by the previous invocation.
559      blockCache.evictBlocksByHfileName(hfilePath.getName());
560
561      conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
562      Set<String> keyStrSet = new HashSet<>();
563      byte[][] keys = new byte[NUM_KV][];
564      byte[][] values = new byte[NUM_KV][];
565
566      // Write the HFile
567      {
568        HFileContext meta =
569          new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(compr).build();
570        HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath)
571          .withFileContext(meta).create();
572        Random rand = new Random(19231737);
573        byte[] family = Bytes.toBytes("f");
574        byte[] qualifier = Bytes.toBytes("q");
575        for (int i = 0; i < NUM_KV; ++i) {
576          byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i);
577
578          // Key will be interpreted by KeyValue.KEY_COMPARATOR
579          KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
580            RandomKeyValueUtil.randomValue(rand));
581          byte[] k = kv.getKey();
582          writer.append(kv);
583          keys[i] = k;
584          values[i] = CellUtil.cloneValue(kv);
585          keyStrSet.add(Bytes.toStringBinary(k));
586          if (i > 0) {
587            assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1], 0,
588              keys[i - 1].length)) > 0);
589          }
590        }
591
592        writer.close();
593      }
594
595      // Read the HFile
596      HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
597      assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
598
599      assertTrue(Bytes.equals(keys[0], ((KeyValue) reader.getFirstKey().get()).getKey()));
600      assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue) reader.getLastKey().get()).getKey()));
601      LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
602
603      for (boolean pread : new boolean[] { false, true }) {
604        HFileScanner scanner = reader.getScanner(conf, true, pread);
605        for (int i = 0; i < NUM_KV; ++i) {
606          checkSeekTo(keys, scanner, i);
607          checkKeyValue("i=" + i, keys[i], values[i],
608            ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
609        }
610        assertTrue(scanner.seekTo());
611        for (int i = NUM_KV - 1; i >= 0; --i) {
612          checkSeekTo(keys, scanner, i);
613          checkKeyValue("i=" + i, keys[i], values[i],
614            ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
615        }
616      }
617
618      // Manually compute the mid-key and validate it.
619      HFile.Reader reader2 = reader;
620      HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
621
622      HFileBlock.BlockIterator iter =
623        fsReader.blockRange(0, reader.getTrailer().getLoadOnOpenDataOffset());
624      HFileBlock block;
625      List<byte[]> blockKeys = new ArrayList<>();
626      while ((block = iter.nextBlock()) != null) {
627        if (block.getBlockType() != BlockType.LEAF_INDEX) return;
628        ByteBuff b = block.getBufferReadOnly();
629        int n = b.getIntAfterPosition(0);
630        // One int for the number of items, and n + 1 for the secondary index.
631        int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
632
633        // Get all the keys from the leaf index block. S
634        for (int i = 0; i < n; ++i) {
635          int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1));
636          int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2));
637          int keyLen = nextKeyRelOffset - keyRelOffset;
638          int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset
639            + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
640          byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen);
641          String blockKeyStr = Bytes.toString(blockKey);
642          blockKeys.add(blockKey);
643
644          // If the first key of the block is not among the keys written, we
645          // are not parsing the non-root index block format correctly.
646          assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
647            keyStrSet.contains(blockKeyStr));
648        }
649      }
650
651      // Validate the mid-key.
652      assertEquals(Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
653        reader.midKey());
654
655      assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
656        reader.getTrailer().getUncompressedDataIndexSize());
657
658      reader.close();
659      reader2.close();
660    }
661  }
662
663  private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) throws IOException {
664    assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0,
665      scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i])));
666  }
667
668  private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, ByteBuffer buf) {
669    assertEquals(
670      msgPrefix + ": expected " + Bytes.toStringBinary(arr) + ", actual "
671        + Bytes.toStringBinary(buf),
672      0, Bytes.compareTo(arr, 0, arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
673  }
674
675  /** Check a key/value pair after it was read by the reader */
676  private void checkKeyValue(String msgPrefix, byte[] expectedKey, byte[] expectedValue,
677    ByteBuffer keyRead, ByteBuffer valueRead) {
678    if (!msgPrefix.isEmpty()) msgPrefix += ". ";
679
680    assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
681    assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, valueRead);
682  }
683
684  @Test
685  public void testIntermediateLevelIndicesWithLargeKeys() throws IOException {
686    testIntermediateLevelIndicesWithLargeKeys(16);
687  }
688
689  @Test
690  public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException {
691    // because of the large rowKeys, we will end up with a 50-level block index without sanity check
692    testIntermediateLevelIndicesWithLargeKeys(2);
693  }
694
695  public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException {
696    Path hfPath =
697      new Path(TEST_UTIL.getDataTestDir(), "testIntermediateLevelIndicesWithLargeKeys.hfile");
698    int maxChunkSize = 1024;
699    FileSystem fs = FileSystem.get(conf);
700    CacheConfig cacheConf = new CacheConfig(conf);
701    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
702    conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries);
703    HFileContext context = new HFileContextBuilder().withBlockSize(16).build();
704    HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf).withFileContext(context)
705      .withPath(fs, hfPath).create();
706    List<byte[]> keys = new ArrayList<>();
707
708    // This should result in leaf-level indices and a root level index
709    for (int i = 0; i < 100; i++) {
710      byte[] rowkey = new byte[maxChunkSize + 1];
711      byte[] b = Bytes.toBytes(i);
712      System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length);
713      keys.add(rowkey);
714      hfw.append(CellUtil.createCell(rowkey));
715    }
716    hfw.close();
717
718    HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf);
719    // Scanner doesn't do Cells yet. Fix.
720    HFileScanner scanner = reader.getScanner(conf, true, true);
721    for (int i = 0; i < keys.size(); ++i) {
722      scanner.seekTo(CellUtil.createCell(keys.get(i)));
723    }
724    reader.close();
725  }
726}