001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.ByteArrayOutputStream;
025import java.io.DataOutputStream;
026import java.io.IOException;
027import java.nio.ByteBuffer;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Random;
034import java.util.Set;
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.FSDataInputStream;
037import org.apache.hadoop.fs.FSDataOutputStream;
038import org.apache.hadoop.fs.FileSystem;
039import org.apache.hadoop.fs.Path;
040import org.apache.hadoop.hbase.CellComparatorImpl;
041import org.apache.hadoop.hbase.CellUtil;
042import org.apache.hadoop.hbase.HBaseClassTestRule;
043import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
044import org.apache.hadoop.hbase.HBaseTestingUtility;
045import org.apache.hadoop.hbase.KeyValue;
046import org.apache.hadoop.hbase.KeyValueUtil;
047import org.apache.hadoop.hbase.PrivateCellUtil;
048import org.apache.hadoop.hbase.fs.HFileSystem;
049import org.apache.hadoop.hbase.io.ByteBuffAllocator;
050import org.apache.hadoop.hbase.io.compress.Compression;
051import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
052import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
053import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
054import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
055import org.apache.hadoop.hbase.nio.ByteBuff;
056import org.apache.hadoop.hbase.nio.MultiByteBuff;
057import org.apache.hadoop.hbase.testclassification.IOTests;
058import org.apache.hadoop.hbase.testclassification.MediumTests;
059import org.apache.hadoop.hbase.util.Bytes;
060import org.apache.hadoop.hbase.util.ClassSize;
061import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
062import org.junit.Before;
063import org.junit.ClassRule;
064import org.junit.Test;
065import org.junit.experimental.categories.Category;
066import org.junit.runner.RunWith;
067import org.junit.runners.Parameterized;
068import org.junit.runners.Parameterized.Parameters;
069import org.slf4j.Logger;
070import org.slf4j.LoggerFactory;
071
072@RunWith(Parameterized.class)
073@Category({IOTests.class, MediumTests.class})
074public class TestHFileBlockIndex {
075
076  @ClassRule
077  public static final HBaseClassTestRule CLASS_RULE =
078      HBaseClassTestRule.forClass(TestHFileBlockIndex.class);
079
080  @Parameters
081  public static Collection<Object[]> compressionAlgorithms() {
082    return HBaseCommonTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
083  }
084
085  public TestHFileBlockIndex(Compression.Algorithm compr) {
086    this.compr = compr;
087  }
088
089  private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class);
090
091  private static final int NUM_DATA_BLOCKS = 1000;
092  private static final HBaseTestingUtility TEST_UTIL =
093      new HBaseTestingUtility();
094
095  private static final int SMALL_BLOCK_SIZE = 4096;
096  private static final int NUM_KV = 10000;
097
098  private static FileSystem fs;
099  private Path path;
100  private Random rand;
101  private long rootIndexOffset;
102  private int numRootEntries;
103  private int numLevels;
104  private static final List<byte[]> keys = new ArrayList<>();
105  private final Compression.Algorithm compr;
106  private byte[] firstKeyInFile;
107  private Configuration conf;
108
109  private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
110  private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
111  private static final int[] UNCOMPRESSED_INDEX_SIZES =
112      { 19187, 21813, 23086 };
113
114  private static final boolean includesMemstoreTS = true;
115
116  static {
117    assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
118    assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
119  }
120
121  @Before
122  public void setUp() throws IOException {
123    keys.clear();
124    rand = new Random(2389757);
125    firstKeyInFile = null;
126    conf = TEST_UTIL.getConfiguration();
127
128    // This test requires at least HFile format version 2.
129    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
130
131    fs = HFileSystem.get(conf);
132  }
133
134  @Test
135  public void testBlockIndex() throws IOException {
136    testBlockIndexInternals(false);
137    clear();
138    testBlockIndexInternals(true);
139  }
140
141  private void clear() throws IOException {
142    keys.clear();
143    rand = new Random(2389757);
144    firstKeyInFile = null;
145    conf = TEST_UTIL.getConfiguration();
146
147    // This test requires at least HFile format version 2.
148    conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
149
150    fs = HFileSystem.get(conf);
151  }
152
153  private void testBlockIndexInternals(boolean useTags) throws IOException {
154    path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
155    writeWholeIndex(useTags);
156    readIndex(useTags);
157  }
158
159  /**
160   * A wrapper around a block reader which only caches the results of the last
161   * operation. Not thread-safe.
162   */
163  private static class BlockReaderWrapper implements HFile.CachingBlockReader {
164
165    private HFileBlock.FSReader realReader;
166    private long prevOffset;
167    private long prevOnDiskSize;
168    private boolean prevPread;
169    private HFileBlock prevBlock;
170
171    public int hitCount = 0;
172    public int missCount = 0;
173
174    public BlockReaderWrapper(HFileBlock.FSReader realReader) {
175      this.realReader = realReader;
176    }
177
178    @Override
179    public HFileBlock readBlock(long offset, long onDiskSize,
180        boolean cacheBlock, boolean pread, boolean isCompaction,
181        boolean updateCacheMetrics, BlockType expectedBlockType,
182        DataBlockEncoding expectedDataBlockEncoding)
183        throws IOException {
184      if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
185          pread == prevPread) {
186        hitCount += 1;
187        return prevBlock;
188      }
189
190      missCount += 1;
191      prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true);
192      prevOffset = offset;
193      prevOnDiskSize = onDiskSize;
194      prevPread = pread;
195
196      return prevBlock;
197    }
198  }
199
200  private void readIndex(boolean useTags) throws IOException {
201    long fileSize = fs.getFileStatus(path).getLen();
202    LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString());
203
204    FSDataInputStream istream = fs.open(path);
205    HFileContext meta = new HFileContextBuilder()
206                        .withHBaseCheckSum(true)
207                        .withIncludesMvcc(includesMemstoreTS)
208                        .withIncludesTags(useTags)
209                        .withCompression(compr)
210                        .build();
211    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build();
212    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(context, meta,
213        ByteBuffAllocator.HEAP);
214
215    BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
216    HFileBlockIndex.BlockIndexReader indexReader =
217        new HFileBlockIndex.CellBasedKeyBlockIndexReader(
218            CellComparatorImpl.COMPARATOR, numLevels);
219
220    indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
221        fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
222
223    long prevOffset = -1;
224    int i = 0;
225    int expectedHitCount = 0;
226    int expectedMissCount = 0;
227    LOG.info("Total number of keys: " + keys.size());
228    for (byte[] key : keys) {
229      assertTrue(key != null);
230      assertTrue(indexReader != null);
231      KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
232      HFileBlock b =
233          indexReader.seekToDataBlock(keyOnlyKey, null, true,
234            true, false, null, brw);
235      if (PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0,
236        firstKeyInFile.length) < 0) {
237        assertTrue(b == null);
238        ++i;
239        continue;
240      }
241
242      String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
243
244      assertTrue("seekToDataBlock failed for " + keyStr, b != null);
245
246      if (prevOffset == b.getOffset()) {
247        assertEquals(++expectedHitCount, brw.hitCount);
248      } else {
249        LOG.info("First key in a new block: " + keyStr + ", block offset: "
250            + b.getOffset() + ")");
251        assertTrue(b.getOffset() > prevOffset);
252        assertEquals(++expectedMissCount, brw.missCount);
253        prevOffset = b.getOffset();
254      }
255      ++i;
256    }
257
258    istream.close();
259  }
260
261  private void writeWholeIndex(boolean useTags) throws IOException {
262    assertEquals(0, keys.size());
263    HFileContext meta = new HFileContextBuilder()
264                        .withHBaseCheckSum(true)
265                        .withIncludesMvcc(includesMemstoreTS)
266                        .withIncludesTags(useTags)
267                        .withCompression(compr)
268                        .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
269                        .build();
270    HFileBlock.Writer hbw = new HFileBlock.Writer(null,
271        meta);
272    FSDataOutputStream outputStream = fs.create(path);
273    HFileBlockIndex.BlockIndexWriter biw =
274        new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
275
276    for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
277      hbw.startWriting(BlockType.DATA).write(String.valueOf(rand.nextInt(1000)).getBytes());
278      long blockOffset = outputStream.getPos();
279      hbw.writeHeaderAndData(outputStream);
280
281      byte[] firstKey = null;
282      byte[] family = Bytes.toBytes("f");
283      byte[] qualifier = Bytes.toBytes("q");
284      for (int j = 0; j < 16; ++j) {
285        byte[] k =
286            new KeyValue(RandomKeyValueUtil.randomOrderedKey(rand, i * 16 + j), family, qualifier,
287                EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey();
288        keys.add(k);
289        if (j == 8) {
290          firstKey = k;
291        }
292      }
293      assertTrue(firstKey != null);
294      if (firstKeyInFile == null) {
295        firstKeyInFile = firstKey;
296      }
297      biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
298
299      writeInlineBlocks(hbw, outputStream, biw, false);
300    }
301    writeInlineBlocks(hbw, outputStream, biw, true);
302    rootIndexOffset = biw.writeIndexBlocks(outputStream);
303    outputStream.close();
304
305    numLevels = biw.getNumLevels();
306    numRootEntries = biw.getNumRootEntries();
307
308    LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
309        numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
310  }
311
312  private void writeInlineBlocks(HFileBlock.Writer hbw,
313      FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
314      boolean isClosing) throws IOException {
315    while (biw.shouldWriteBlock(isClosing)) {
316      long offset = outputStream.getPos();
317      biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
318      hbw.writeHeaderAndData(outputStream);
319      biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
320          hbw.getUncompressedSizeWithoutHeader());
321      LOG.info("Wrote an inline index block at " + offset + ", size " +
322          hbw.getOnDiskSizeWithHeader());
323    }
324  }
325
326  private static final long getDummyFileOffset(int i) {
327    return i * 185 + 379;
328  }
329
330  private static final int getDummyOnDiskSize(int i) {
331    return i * i * 37 + i * 19 + 13;
332  }
333
334  @Test
335  public void testSecondaryIndexBinarySearch() throws IOException {
336    int numTotalKeys = 99;
337    assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
338
339    // We only add odd-index keys into the array that we will binary-search.
340    int numSearchedKeys = (numTotalKeys - 1) / 2;
341
342    ByteArrayOutputStream baos = new ByteArrayOutputStream();
343    DataOutputStream dos = new DataOutputStream(baos);
344
345    dos.writeInt(numSearchedKeys);
346    int curAllEntriesSize = 0;
347    int numEntriesAdded = 0;
348
349    // Only odd-index elements of this array are used to keep the secondary
350    // index entries of the corresponding keys.
351    int secondaryIndexEntries[] = new int[numTotalKeys];
352
353    for (int i = 0; i < numTotalKeys; ++i) {
354      byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i * 2);
355      KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"),
356          Bytes.toBytes("val"));
357      //KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length);
358      keys.add(cell.getKey());
359      String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
360      StringBuilder padding = new StringBuilder();
361      while (msgPrefix.length() + padding.length() < 70)
362        padding.append(' ');
363      msgPrefix += padding;
364      if (i % 2 == 1) {
365        dos.writeInt(curAllEntriesSize);
366        secondaryIndexEntries[i] = curAllEntriesSize;
367        LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
368            ", offset " + curAllEntriesSize);
369        curAllEntriesSize += cell.getKey().length
370            + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
371        ++numEntriesAdded;
372      } else {
373        secondaryIndexEntries[i] = -1;
374        LOG.info(msgPrefix + "not in the searched array");
375      }
376    }
377
378    // Make sure the keys are increasing.
379    for (int i = 0; i < keys.size() - 1; ++i)
380      assertTrue(CellComparatorImpl.COMPARATOR.compare(
381          new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length),
382          new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0);
383
384    dos.writeInt(curAllEntriesSize);
385    assertEquals(numSearchedKeys, numEntriesAdded);
386    int secondaryIndexOffset = dos.size();
387    assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
388        secondaryIndexOffset);
389
390    for (int i = 1; i <= numTotalKeys - 1; i += 2) {
391      assertEquals(dos.size(),
392          secondaryIndexOffset + secondaryIndexEntries[i]);
393      long dummyFileOffset = getDummyFileOffset(i);
394      int dummyOnDiskSize = getDummyOnDiskSize(i);
395      LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
396          dummyOnDiskSize + " at offset " + dos.size());
397      dos.writeLong(dummyFileOffset);
398      dos.writeInt(dummyOnDiskSize);
399      LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
400      dos.write(keys.get(i));
401    }
402
403    dos.writeInt(curAllEntriesSize);
404
405    ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
406    for (int i = 0; i < numTotalKeys; ++i) {
407      byte[] searchKey = keys.get(i);
408      byte[] arrayHoldingKey = new byte[searchKey.length +
409                                        searchKey.length / 2];
410
411      // To make things a bit more interesting, store the key we are looking
412      // for at a non-zero offset in a new array.
413      System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
414            searchKey.length);
415
416      KeyValue.KeyOnlyKeyValue cell = new KeyValue.KeyOnlyKeyValue(
417          arrayHoldingKey, searchKey.length / 2, searchKey.length);
418      int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell,
419          new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR);
420      String lookupFailureMsg = "Failed to look up key #" + i + " ("
421          + Bytes.toStringBinary(searchKey) + ")";
422
423      int expectedResult;
424      int referenceItem;
425
426      if (i % 2 == 1) {
427        // This key is in the array we search as the element (i - 1) / 2. Make
428        // sure we find it.
429        expectedResult = (i - 1) / 2;
430        referenceItem = i;
431      } else {
432        // This key is not in the array but between two elements on the array,
433        // in the beginning, or in the end. The result should be the previous
434        // key in the searched array, or -1 for i = 0.
435        expectedResult = i / 2 - 1;
436        referenceItem = i - 1;
437      }
438
439      assertEquals(lookupFailureMsg, expectedResult, searchResult);
440
441      // Now test we can get the offset and the on-disk-size using a
442      // higher-level API function.s
443      boolean locateBlockResult =
444          (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell,
445          CellComparatorImpl.COMPARATOR) != -1);
446
447      if (i == 0) {
448        assertFalse(locateBlockResult);
449      } else {
450        assertTrue(locateBlockResult);
451        String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
452        assertEquals(errorMsg, getDummyFileOffset(referenceItem),
453            nonRootIndex.getLong());
454        assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
455            nonRootIndex.getInt());
456      }
457    }
458
459  }
460
461  @Test
462  public void testBlockIndexChunk() throws IOException {
463    BlockIndexChunk c = new BlockIndexChunk();
464    ByteArrayOutputStream baos = new ByteArrayOutputStream();
465    int N = 1000;
466    int[] numSubEntriesAt = new int[N];
467    int numSubEntries = 0;
468    for (int i = 0; i < N; ++i) {
469      baos.reset();
470      DataOutputStream dos = new DataOutputStream(baos);
471      c.writeNonRoot(dos);
472      assertEquals(c.getNonRootSize(), dos.size());
473
474      baos.reset();
475      dos = new DataOutputStream(baos);
476      c.writeRoot(dos);
477      assertEquals(c.getRootSize(), dos.size());
478
479      byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i);
480      numSubEntries += rand.nextInt(5) + 1;
481      keys.add(k);
482      c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
483    }
484
485    // Test the ability to look up the entry that contains a particular
486    // deeper-level index block's entry ("sub-entry"), assuming a global
487    // 0-based ordering of sub-entries. This is needed for mid-key calculation.
488    for (int i = 0; i < N; ++i) {
489      for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
490           j < numSubEntriesAt[i];
491           ++j) {
492        assertEquals(i, c.getEntryBySubEntry(j));
493      }
494    }
495  }
496
497  /** Checks if the HeapSize calculator is within reason */
498  @Test
499  public void testHeapSizeForBlockIndex() throws IOException {
500    Class<HFileBlockIndex.BlockIndexReader> cl =
501        HFileBlockIndex.BlockIndexReader.class;
502    long expected = ClassSize.estimateBase(cl, false);
503
504    HFileBlockIndex.BlockIndexReader bi =
505        new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
506    long actual = bi.heapSize();
507
508    // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
509    // int [] blockDataSizes) are all null they are not going to show up in the
510    // HeapSize calculation, so need to remove those array costs from expected.
511    // Already the block keys are not there in this case
512    expected -= ClassSize.align(2 * ClassSize.ARRAY);
513
514    if (expected != actual) {
515      expected = ClassSize.estimateBase(cl, true);
516      assertEquals(expected, actual);
517    }
518  }
519
520  /**
521  * to check if looks good when midKey on a leaf index block boundary
522  * @throws IOException
523  */
524  @Test
525  public void testMidKeyOnLeafIndexBlockBoundary() throws IOException {
526    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey");
527    int maxChunkSize = 512;
528    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
529    // should open hfile.block.index.cacheonwrite
530    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true);
531    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
532    BlockCache blockCache = cacheConf.getBlockCache().get();
533    // Evict all blocks that were cached-on-write by the previous invocation.
534    blockCache.evictBlocksByHfileName(hfilePath.getName());
535    // Write the HFile
536    HFileContext meta =
537        new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(Algorithm.NONE)
538            .withDataBlockEncoding(DataBlockEncoding.NONE).build();
539    HFile.Writer writer =
540        HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath).withFileContext(meta)
541            .create();
542    Random rand = new Random(19231737);
543    byte[] family = Bytes.toBytes("f");
544    byte[] qualifier = Bytes.toBytes("q");
545    int kvNumberToBeWritten = 16;
546    // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks,
547    // midkey is just on the boundary of the first leaf-index block
548    for (int i = 0; i < kvNumberToBeWritten; ++i) {
549      byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30);
550
551      // Key will be interpreted by KeyValue.KEY_COMPARATOR
552      KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
553          RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE));
554      writer.append(kv);
555    }
556    writer.close();
557
558    // close hfile.block.index.cacheonwrite
559    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
560
561    // Read the HFile
562    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
563
564    boolean hasArrayIndexOutOfBoundsException = false;
565    try {
566      // get the mid-key.
567      reader.midKey();
568    } catch (ArrayIndexOutOfBoundsException e) {
569      hasArrayIndexOutOfBoundsException = true;
570    } finally {
571      reader.close();
572    }
573
574    // to check if ArrayIndexOutOfBoundsException occurred
575    assertFalse(hasArrayIndexOutOfBoundsException);
576  }
577
578  /**
579   * Testing block index through the HFile writer/reader APIs. Allows to test
580   * setting index block size through configuration, intermediate-level index
581   * blocks, and caching index blocks on write.
582   *
583   * @throws IOException
584   */
585  @Test
586  public void testHFileWriterAndReader() throws IOException {
587    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
588        "hfile_for_block_index");
589    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
590    BlockCache blockCache = cacheConf.getBlockCache().get();
591
592    for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
593      int indexBlockSize = INDEX_CHUNK_SIZES[testI];
594      int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
595      LOG.info("Index block size: " + indexBlockSize + ", compression: "
596          + compr);
597      // Evict all blocks that were cached-on-write by the previous invocation.
598      blockCache.evictBlocksByHfileName(hfilePath.getName());
599
600      conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
601      Set<String> keyStrSet = new HashSet<>();
602      byte[][] keys = new byte[NUM_KV][];
603      byte[][] values = new byte[NUM_KV][];
604
605      // Write the HFile
606      {
607        HFileContext meta = new HFileContextBuilder()
608                            .withBlockSize(SMALL_BLOCK_SIZE)
609                            .withCompression(compr)
610                            .build();
611        HFile.Writer writer =
612            HFile.getWriterFactory(conf, cacheConf)
613                .withPath(fs, hfilePath)
614                .withFileContext(meta)
615                .create();
616        Random rand = new Random(19231737);
617        byte[] family = Bytes.toBytes("f");
618        byte[] qualifier = Bytes.toBytes("q");
619        for (int i = 0; i < NUM_KV; ++i) {
620          byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i);
621
622          // Key will be interpreted by KeyValue.KEY_COMPARATOR
623          KeyValue kv =
624              new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
625                  RandomKeyValueUtil.randomValue(rand));
626          byte[] k = kv.getKey();
627          writer.append(kv);
628          keys[i] = k;
629          values[i] = CellUtil.cloneValue(kv);
630          keyStrSet.add(Bytes.toStringBinary(k));
631          if (i > 0) {
632            assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1],
633                0, keys[i - 1].length)) > 0);
634          }
635        }
636
637        writer.close();
638      }
639
640      // Read the HFile
641      HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
642      assertEquals(expectedNumLevels,
643          reader.getTrailer().getNumDataIndexLevels());
644
645      assertTrue(Bytes.equals(keys[0], ((KeyValue)reader.getFirstKey().get()).getKey()));
646      assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue)reader.getLastKey().get()).getKey()));
647      LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
648
649      for (boolean pread : new boolean[] { false, true }) {
650        HFileScanner scanner = reader.getScanner(true, pread);
651        for (int i = 0; i < NUM_KV; ++i) {
652          checkSeekTo(keys, scanner, i);
653          checkKeyValue("i=" + i, keys[i], values[i],
654              ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
655        }
656        assertTrue(scanner.seekTo());
657        for (int i = NUM_KV - 1; i >= 0; --i) {
658          checkSeekTo(keys, scanner, i);
659          checkKeyValue("i=" + i, keys[i], values[i],
660              ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
661        }
662      }
663
664      // Manually compute the mid-key and validate it.
665      HFile.Reader reader2 = reader;
666      HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
667
668      HFileBlock.BlockIterator iter = fsReader.blockRange(0,
669          reader.getTrailer().getLoadOnOpenDataOffset());
670      HFileBlock block;
671      List<byte[]> blockKeys = new ArrayList<>();
672      while ((block = iter.nextBlock()) != null) {
673        if (block.getBlockType() != BlockType.LEAF_INDEX)
674          return;
675        ByteBuff b = block.getBufferReadOnly();
676        int n = b.getIntAfterPosition(0);
677        // One int for the number of items, and n + 1 for the secondary index.
678        int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
679
680        // Get all the keys from the leaf index block. S
681        for (int i = 0; i < n; ++i) {
682          int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1));
683          int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2));
684          int keyLen = nextKeyRelOffset - keyRelOffset;
685          int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
686              HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
687          byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
688              + keyLen);
689          String blockKeyStr = Bytes.toString(blockKey);
690          blockKeys.add(blockKey);
691
692          // If the first key of the block is not among the keys written, we
693          // are not parsing the non-root index block format correctly.
694          assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
695              keyStrSet.contains(blockKeyStr));
696        }
697      }
698
699      // Validate the mid-key.
700      assertEquals(
701          Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
702          reader.midKey());
703
704      assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
705          reader.getTrailer().getUncompressedDataIndexSize());
706
707      reader.close();
708      reader2.close();
709    }
710  }
711
712  private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
713      throws IOException {
714    assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0,
715        scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i])));
716  }
717
718  private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
719      ByteBuffer buf) {
720    assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
721        + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
722        arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
723  }
724
725  /** Check a key/value pair after it was read by the reader */
726  private void checkKeyValue(String msgPrefix, byte[] expectedKey,
727      byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
728    if (!msgPrefix.isEmpty())
729      msgPrefix += ". ";
730
731    assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
732    assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
733        valueRead);
734  }
735
736  @Test
737  public void testIntermediateLevelIndicesWithLargeKeys() throws IOException {
738    testIntermediateLevelIndicesWithLargeKeys(16);
739  }
740
741  @Test
742  public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException {
743    // because of the large rowKeys, we will end up with a 50-level block index without sanity check
744    testIntermediateLevelIndicesWithLargeKeys(2);
745  }
746
747  public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException {
748    Path hfPath = new Path(TEST_UTIL.getDataTestDir(),
749      "testIntermediateLevelIndicesWithLargeKeys.hfile");
750    int maxChunkSize = 1024;
751    FileSystem fs = FileSystem.get(conf);
752    CacheConfig cacheConf = new CacheConfig(conf);
753    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
754    conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries);
755    HFileContext context = new HFileContextBuilder().withBlockSize(16).build();
756    HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf)
757            .withFileContext(context)
758            .withPath(fs, hfPath).create();
759    List<byte[]> keys = new ArrayList<>();
760
761    // This should result in leaf-level indices and a root level index
762    for (int i=0; i < 100; i++) {
763      byte[] rowkey = new byte[maxChunkSize + 1];
764      byte[] b = Bytes.toBytes(i);
765      System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length);
766      keys.add(rowkey);
767      hfw.append(CellUtil.createCell(rowkey));
768    }
769    hfw.close();
770
771    HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf);
772    // Scanner doesn't do Cells yet.  Fix.
773    HFileScanner scanner = reader.getScanner(true, true);
774    for (int i = 0; i < keys.size(); ++i) {
775      scanner.seekTo(CellUtil.createCell(keys.get(i)));
776    }
777    reader.close();
778  }
779}
780