001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.ByteArrayOutputStream;
025import java.io.DataOutputStream;
026import java.io.IOException;
027import java.nio.ByteBuffer;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Random;
034import java.util.Set;
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.FSDataInputStream;
037import org.apache.hadoop.fs.FSDataOutputStream;
038import org.apache.hadoop.fs.FileSystem;
039import org.apache.hadoop.fs.Path;
040import org.apache.hadoop.hbase.CellBuilderType;
041import org.apache.hadoop.hbase.CellComparatorImpl;
042import org.apache.hadoop.hbase.CellUtil;
043import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
044import org.apache.hadoop.hbase.HBaseClassTestRule;
045import org.apache.hadoop.hbase.HBaseCommonTestingUtil;
046import org.apache.hadoop.hbase.HBaseTestingUtil;
047import org.apache.hadoop.hbase.HConstants;
048import org.apache.hadoop.hbase.KeyValue;
049import org.apache.hadoop.hbase.KeyValueUtil;
050import org.apache.hadoop.hbase.PrivateCellUtil;
051import org.apache.hadoop.hbase.fs.HFileSystem;
052import org.apache.hadoop.hbase.io.ByteBuffAllocator;
053import org.apache.hadoop.hbase.io.compress.Compression;
054import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
055import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
056import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
057import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
058import org.apache.hadoop.hbase.nio.ByteBuff;
059import org.apache.hadoop.hbase.nio.MultiByteBuff;
060import org.apache.hadoop.hbase.testclassification.IOTests;
061import org.apache.hadoop.hbase.testclassification.MediumTests;
062import org.apache.hadoop.hbase.util.Bytes;
063import org.apache.hadoop.hbase.util.ClassSize;
064import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
065import org.junit.Before;
066import org.junit.ClassRule;
067import org.junit.Test;
068import org.junit.experimental.categories.Category;
069import org.junit.runner.RunWith;
070import org.junit.runners.Parameterized;
071import org.junit.runners.Parameterized.Parameters;
072import org.slf4j.Logger;
073import org.slf4j.LoggerFactory;
074
075@RunWith(Parameterized.class)
076@Category({IOTests.class, MediumTests.class})
077public class TestHFileBlockIndex {
078
079  @ClassRule
080  public static final HBaseClassTestRule CLASS_RULE =
081      HBaseClassTestRule.forClass(TestHFileBlockIndex.class);
082
083  @Parameters
084  public static Collection<Object[]> compressionAlgorithms() {
085    return HBaseCommonTestingUtil.COMPRESSION_ALGORITHMS_PARAMETERIZED;
086  }
087
088  public TestHFileBlockIndex(Compression.Algorithm compr) {
089    this.compr = compr;
090  }
091
092  private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class);
093
094  private static final int NUM_DATA_BLOCKS = 1000;
095  private static final HBaseTestingUtil TEST_UTIL =
096      new HBaseTestingUtil();
097
098  private static final int SMALL_BLOCK_SIZE = 4096;
099  private static final int NUM_KV = 10000;
100
101  private static FileSystem fs;
102  private Path path;
103  private Random rand;
104  private long rootIndexOffset;
105  private int numRootEntries;
106  private int numLevels;
107  private static final List<byte[]> keys = new ArrayList<>();
108  private final Compression.Algorithm compr;
109  private byte[] firstKeyInFile;
110  private Configuration conf;
111
112  private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
113  private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
114  private static final int[] UNCOMPRESSED_INDEX_SIZES =
115      { 19187, 21813, 23086 };
116
117  private static final boolean includesMemstoreTS = true;
118
119  static {
120    assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
121    assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
122  }
123
124  @Before
125  public void setUp() throws IOException {
126    keys.clear();
127    rand = new Random(2389757);
128    firstKeyInFile = null;
129    conf = TEST_UTIL.getConfiguration();
130
131    // This test requires at least HFile format version 2.
132    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
133
134    fs = HFileSystem.get(conf);
135  }
136
137  @Test
138  public void testBlockIndex() throws IOException {
139    testBlockIndexInternals(false);
140    clear();
141    testBlockIndexInternals(true);
142  }
143
144  private void clear() throws IOException {
145    keys.clear();
146    rand = new Random(2389757);
147    firstKeyInFile = null;
148    conf = TEST_UTIL.getConfiguration();
149
150    // This test requires at least HFile format version 2.
151    conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
152
153    fs = HFileSystem.get(conf);
154  }
155
156  private void testBlockIndexInternals(boolean useTags) throws IOException {
157    path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
158    writeWholeIndex(useTags);
159    readIndex(useTags);
160  }
161
162  /**
163   * A wrapper around a block reader which only caches the results of the last
164   * operation. Not thread-safe.
165   */
166  private static class BlockReaderWrapper implements HFile.CachingBlockReader {
167
168    private HFileBlock.FSReader realReader;
169    private long prevOffset;
170    private long prevOnDiskSize;
171    private boolean prevPread;
172    private HFileBlock prevBlock;
173
174    public int hitCount = 0;
175    public int missCount = 0;
176
177    public BlockReaderWrapper(HFileBlock.FSReader realReader) {
178      this.realReader = realReader;
179    }
180
181    @Override
182    public HFileBlock readBlock(long offset, long onDiskSize,
183        boolean cacheBlock, boolean pread, boolean isCompaction,
184        boolean updateCacheMetrics, BlockType expectedBlockType,
185        DataBlockEncoding expectedDataBlockEncoding)
186        throws IOException {
187      if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
188          pread == prevPread) {
189        hitCount += 1;
190        return prevBlock;
191      }
192
193      missCount += 1;
194      prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true);
195      prevOffset = offset;
196      prevOnDiskSize = onDiskSize;
197      prevPread = pread;
198
199      return prevBlock;
200    }
201  }
202
203  private void readIndex(boolean useTags) throws IOException {
204    long fileSize = fs.getFileStatus(path).getLen();
205    LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString());
206
207    FSDataInputStream istream = fs.open(path);
208    HFileContext meta = new HFileContextBuilder()
209                        .withHBaseCheckSum(true)
210                        .withIncludesMvcc(includesMemstoreTS)
211                        .withIncludesTags(useTags)
212                        .withCompression(compr)
213                        .build();
214    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build();
215    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(context, meta,
216        ByteBuffAllocator.HEAP, conf);
217
218    BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
219    HFileBlockIndex.BlockIndexReader indexReader =
220        new HFileBlockIndex.CellBasedKeyBlockIndexReader(
221            CellComparatorImpl.COMPARATOR, numLevels);
222
223    indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
224        fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
225
226    long prevOffset = -1;
227    int i = 0;
228    int expectedHitCount = 0;
229    int expectedMissCount = 0;
230    LOG.info("Total number of keys: " + keys.size());
231    for (byte[] key : keys) {
232      assertTrue(key != null);
233      assertTrue(indexReader != null);
234      KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
235      HFileBlock b =
236          indexReader.seekToDataBlock(keyOnlyKey, null, true,
237            true, false, null, brw);
238      if (PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0,
239        firstKeyInFile.length) < 0) {
240        assertTrue(b == null);
241        ++i;
242        continue;
243      }
244
245      String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
246
247      assertTrue("seekToDataBlock failed for " + keyStr, b != null);
248
249      if (prevOffset == b.getOffset()) {
250        assertEquals(++expectedHitCount, brw.hitCount);
251      } else {
252        LOG.info("First key in a new block: " + keyStr + ", block offset: "
253            + b.getOffset() + ")");
254        assertTrue(b.getOffset() > prevOffset);
255        assertEquals(++expectedMissCount, brw.missCount);
256        prevOffset = b.getOffset();
257      }
258      ++i;
259    }
260
261    istream.close();
262  }
263
264  private void writeWholeIndex(boolean useTags) throws IOException {
265    assertEquals(0, keys.size());
266    HFileContext meta = new HFileContextBuilder()
267                        .withHBaseCheckSum(true)
268                        .withIncludesMvcc(includesMemstoreTS)
269                        .withIncludesTags(useTags)
270                        .withCompression(compr)
271                        .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
272                        .build();
273    HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null,
274        meta);
275    FSDataOutputStream outputStream = fs.create(path);
276    HFileBlockIndex.BlockIndexWriter biw =
277        new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
278
279    for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
280      hbw.startWriting(BlockType.DATA).write(Bytes.toBytes(String.valueOf(rand.nextInt(1000))));
281      long blockOffset = outputStream.getPos();
282      hbw.writeHeaderAndData(outputStream);
283
284      byte[] firstKey = null;
285      byte[] family = Bytes.toBytes("f");
286      byte[] qualifier = Bytes.toBytes("q");
287      for (int j = 0; j < 16; ++j) {
288        byte[] k =
289            new KeyValue(RandomKeyValueUtil.randomOrderedKey(rand, i * 16 + j), family, qualifier,
290                EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey();
291        keys.add(k);
292        if (j == 8) {
293          firstKey = k;
294        }
295      }
296      assertTrue(firstKey != null);
297      if (firstKeyInFile == null) {
298        firstKeyInFile = firstKey;
299      }
300      biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
301
302      writeInlineBlocks(hbw, outputStream, biw, false);
303    }
304    writeInlineBlocks(hbw, outputStream, biw, true);
305    rootIndexOffset = biw.writeIndexBlocks(outputStream);
306    outputStream.close();
307
308    numLevels = biw.getNumLevels();
309    numRootEntries = biw.getNumRootEntries();
310
311    LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
312        numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
313  }
314
315  private void writeInlineBlocks(HFileBlock.Writer hbw,
316      FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
317      boolean isClosing) throws IOException {
318    while (biw.shouldWriteBlock(isClosing)) {
319      long offset = outputStream.getPos();
320      biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
321      hbw.writeHeaderAndData(outputStream);
322      biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
323          hbw.getUncompressedSizeWithoutHeader());
324      LOG.info("Wrote an inline index block at " + offset + ", size " +
325          hbw.getOnDiskSizeWithHeader());
326    }
327  }
328
329  private static final long getDummyFileOffset(int i) {
330    return i * 185 + 379;
331  }
332
333  private static final int getDummyOnDiskSize(int i) {
334    return i * i * 37 + i * 19 + 13;
335  }
336
337  @Test
338  public void testSecondaryIndexBinarySearch() throws IOException {
339    int numTotalKeys = 99;
340    assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
341
342    // We only add odd-index keys into the array that we will binary-search.
343    int numSearchedKeys = (numTotalKeys - 1) / 2;
344
345    ByteArrayOutputStream baos = new ByteArrayOutputStream();
346    DataOutputStream dos = new DataOutputStream(baos);
347
348    dos.writeInt(numSearchedKeys);
349    int curAllEntriesSize = 0;
350    int numEntriesAdded = 0;
351
352    // Only odd-index elements of this array are used to keep the secondary
353    // index entries of the corresponding keys.
354    int secondaryIndexEntries[] = new int[numTotalKeys];
355
356    for (int i = 0; i < numTotalKeys; ++i) {
357      byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i * 2);
358      KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"),
359          Bytes.toBytes("val"));
360      //KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length);
361      keys.add(cell.getKey());
362      String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
363      StringBuilder padding = new StringBuilder();
364      while (msgPrefix.length() + padding.length() < 70)
365        padding.append(' ');
366      msgPrefix += padding;
367      if (i % 2 == 1) {
368        dos.writeInt(curAllEntriesSize);
369        secondaryIndexEntries[i] = curAllEntriesSize;
370        LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
371            ", offset " + curAllEntriesSize);
372        curAllEntriesSize += cell.getKey().length
373            + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
374        ++numEntriesAdded;
375      } else {
376        secondaryIndexEntries[i] = -1;
377        LOG.info(msgPrefix + "not in the searched array");
378      }
379    }
380
381    // Make sure the keys are increasing.
382    for (int i = 0; i < keys.size() - 1; ++i)
383      assertTrue(CellComparatorImpl.COMPARATOR.compare(
384          new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length),
385          new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0);
386
387    dos.writeInt(curAllEntriesSize);
388    assertEquals(numSearchedKeys, numEntriesAdded);
389    int secondaryIndexOffset = dos.size();
390    assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
391        secondaryIndexOffset);
392
393    for (int i = 1; i <= numTotalKeys - 1; i += 2) {
394      assertEquals(dos.size(),
395          secondaryIndexOffset + secondaryIndexEntries[i]);
396      long dummyFileOffset = getDummyFileOffset(i);
397      int dummyOnDiskSize = getDummyOnDiskSize(i);
398      LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
399          dummyOnDiskSize + " at offset " + dos.size());
400      dos.writeLong(dummyFileOffset);
401      dos.writeInt(dummyOnDiskSize);
402      LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
403      dos.write(keys.get(i));
404    }
405
406    dos.writeInt(curAllEntriesSize);
407
408    ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
409    for (int i = 0; i < numTotalKeys; ++i) {
410      byte[] searchKey = keys.get(i);
411      byte[] arrayHoldingKey = new byte[searchKey.length +
412                                        searchKey.length / 2];
413
414      // To make things a bit more interesting, store the key we are looking
415      // for at a non-zero offset in a new array.
416      System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
417            searchKey.length);
418
419      KeyValue.KeyOnlyKeyValue cell = new KeyValue.KeyOnlyKeyValue(
420          arrayHoldingKey, searchKey.length / 2, searchKey.length);
421      int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell,
422          new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR);
423      String lookupFailureMsg = "Failed to look up key #" + i + " ("
424          + Bytes.toStringBinary(searchKey) + ")";
425
426      int expectedResult;
427      int referenceItem;
428
429      if (i % 2 == 1) {
430        // This key is in the array we search as the element (i - 1) / 2. Make
431        // sure we find it.
432        expectedResult = (i - 1) / 2;
433        referenceItem = i;
434      } else {
435        // This key is not in the array but between two elements on the array,
436        // in the beginning, or in the end. The result should be the previous
437        // key in the searched array, or -1 for i = 0.
438        expectedResult = i / 2 - 1;
439        referenceItem = i - 1;
440      }
441
442      assertEquals(lookupFailureMsg, expectedResult, searchResult);
443
444      // Now test we can get the offset and the on-disk-size using a
445      // higher-level API function.s
446      boolean locateBlockResult =
447          (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell,
448          CellComparatorImpl.COMPARATOR) != -1);
449
450      if (i == 0) {
451        assertFalse(locateBlockResult);
452      } else {
453        assertTrue(locateBlockResult);
454        String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
455        assertEquals(errorMsg, getDummyFileOffset(referenceItem),
456            nonRootIndex.getLong());
457        assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
458            nonRootIndex.getInt());
459      }
460    }
461
462  }
463
464  @Test
465  public void testBlockIndexChunk() throws IOException {
466    BlockIndexChunk c = new BlockIndexChunk();
467    ByteArrayOutputStream baos = new ByteArrayOutputStream();
468    int N = 1000;
469    int[] numSubEntriesAt = new int[N];
470    int numSubEntries = 0;
471    for (int i = 0; i < N; ++i) {
472      baos.reset();
473      DataOutputStream dos = new DataOutputStream(baos);
474      c.writeNonRoot(dos);
475      assertEquals(c.getNonRootSize(), dos.size());
476
477      baos.reset();
478      dos = new DataOutputStream(baos);
479      c.writeRoot(dos);
480      assertEquals(c.getRootSize(), dos.size());
481
482      byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i);
483      numSubEntries += rand.nextInt(5) + 1;
484      keys.add(k);
485      c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
486    }
487
488    // Test the ability to look up the entry that contains a particular
489    // deeper-level index block's entry ("sub-entry"), assuming a global
490    // 0-based ordering of sub-entries. This is needed for mid-key calculation.
491    for (int i = 0; i < N; ++i) {
492      for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
493           j < numSubEntriesAt[i];
494           ++j) {
495        assertEquals(i, c.getEntryBySubEntry(j));
496      }
497    }
498  }
499
500  /** Checks if the HeapSize calculator is within reason */
501  @Test
502  public void testHeapSizeForBlockIndex() throws IOException {
503    Class<HFileBlockIndex.BlockIndexReader> cl =
504        HFileBlockIndex.BlockIndexReader.class;
505    long expected = ClassSize.estimateBase(cl, false);
506
507    HFileBlockIndex.BlockIndexReader bi =
508        new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
509    long actual = bi.heapSize();
510
511    // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
512    // int [] blockDataSizes) are all null they are not going to show up in the
513    // HeapSize calculation, so need to remove those array costs from expected.
514    // Already the block keys are not there in this case
515    expected -= ClassSize.align(2 * ClassSize.ARRAY);
516
517    if (expected != actual) {
518      expected = ClassSize.estimateBase(cl, true);
519      assertEquals(expected, actual);
520    }
521  }
522
523  /**
524  * to check if looks good when midKey on a leaf index block boundary
525  * @throws IOException
526  */
527  @Test
528  public void testMidKeyOnLeafIndexBlockBoundary() throws IOException {
529    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey");
530    int maxChunkSize = 512;
531    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
532    // should open hfile.block.index.cacheonwrite
533    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true);
534    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
535    BlockCache blockCache = cacheConf.getBlockCache().get();
536    // Evict all blocks that were cached-on-write by the previous invocation.
537    blockCache.evictBlocksByHfileName(hfilePath.getName());
538    // Write the HFile
539    HFileContext meta =
540        new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(Algorithm.NONE)
541            .withDataBlockEncoding(DataBlockEncoding.NONE).build();
542    HFile.Writer writer =
543        HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath).withFileContext(meta)
544            .create();
545    Random rand = new Random(19231737);
546    byte[] family = Bytes.toBytes("f");
547    byte[] qualifier = Bytes.toBytes("q");
548    int kvNumberToBeWritten = 16;
549    // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks,
550    // midkey is just on the boundary of the first leaf-index block
551    for (int i = 0; i < kvNumberToBeWritten; ++i) {
552      byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30);
553
554      // Key will be interpreted by KeyValue.KEY_COMPARATOR
555      KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
556          RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE));
557      writer.append(kv);
558    }
559    writer.close();
560
561    // close hfile.block.index.cacheonwrite
562    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
563
564    // Read the HFile
565    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
566
567    boolean hasArrayIndexOutOfBoundsException = false;
568    try {
569      // get the mid-key.
570      reader.midKey();
571    } catch (ArrayIndexOutOfBoundsException e) {
572      hasArrayIndexOutOfBoundsException = true;
573    } finally {
574      reader.close();
575    }
576
577    // to check if ArrayIndexOutOfBoundsException occurred
578    assertFalse(hasArrayIndexOutOfBoundsException);
579  }
580
581  /**
582   * Testing block index through the HFile writer/reader APIs. Allows to test
583   * setting index block size through configuration, intermediate-level index
584   * blocks, and caching index blocks on write.
585   *
586   * @throws IOException
587   */
588  @Test
589  public void testHFileWriterAndReader() throws IOException {
590    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
591        "hfile_for_block_index");
592    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
593    BlockCache blockCache = cacheConf.getBlockCache().get();
594
595    for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
596      int indexBlockSize = INDEX_CHUNK_SIZES[testI];
597      int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
598      LOG.info("Index block size: " + indexBlockSize + ", compression: "
599          + compr);
600      // Evict all blocks that were cached-on-write by the previous invocation.
601      blockCache.evictBlocksByHfileName(hfilePath.getName());
602
603      conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
604      Set<String> keyStrSet = new HashSet<>();
605      byte[][] keys = new byte[NUM_KV][];
606      byte[][] values = new byte[NUM_KV][];
607
608      // Write the HFile
609      {
610        HFileContext meta = new HFileContextBuilder()
611                            .withBlockSize(SMALL_BLOCK_SIZE)
612                            .withCompression(compr)
613                            .build();
614        HFile.Writer writer =
615            HFile.getWriterFactory(conf, cacheConf)
616                .withPath(fs, hfilePath)
617                .withFileContext(meta)
618                .create();
619        Random rand = new Random(19231737);
620        byte[] family = Bytes.toBytes("f");
621        byte[] qualifier = Bytes.toBytes("q");
622        for (int i = 0; i < NUM_KV; ++i) {
623          byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i);
624
625          // Key will be interpreted by KeyValue.KEY_COMPARATOR
626          KeyValue kv =
627              new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
628                  RandomKeyValueUtil.randomValue(rand));
629          byte[] k = kv.getKey();
630          writer.append(kv);
631          keys[i] = k;
632          values[i] = CellUtil.cloneValue(kv);
633          keyStrSet.add(Bytes.toStringBinary(k));
634          if (i > 0) {
635            assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1],
636                0, keys[i - 1].length)) > 0);
637          }
638        }
639
640        writer.close();
641      }
642
643      // Read the HFile
644      HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
645      assertEquals(expectedNumLevels,
646          reader.getTrailer().getNumDataIndexLevels());
647
648      assertTrue(Bytes.equals(keys[0], ((KeyValue)reader.getFirstKey().get()).getKey()));
649      assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue)reader.getLastKey().get()).getKey()));
650      LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
651
652      for (boolean pread : new boolean[] { false, true }) {
653        HFileScanner scanner = reader.getScanner(conf, true, pread);
654        for (int i = 0; i < NUM_KV; ++i) {
655          checkSeekTo(keys, scanner, i);
656          checkKeyValue("i=" + i, keys[i], values[i],
657              ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
658        }
659        assertTrue(scanner.seekTo());
660        for (int i = NUM_KV - 1; i >= 0; --i) {
661          checkSeekTo(keys, scanner, i);
662          checkKeyValue("i=" + i, keys[i], values[i],
663              ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
664        }
665      }
666
667      // Manually compute the mid-key and validate it.
668      HFile.Reader reader2 = reader;
669      HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
670
671      HFileBlock.BlockIterator iter = fsReader.blockRange(0,
672          reader.getTrailer().getLoadOnOpenDataOffset());
673      HFileBlock block;
674      List<byte[]> blockKeys = new ArrayList<>();
675      while ((block = iter.nextBlock()) != null) {
676        if (block.getBlockType() != BlockType.LEAF_INDEX)
677          return;
678        ByteBuff b = block.getBufferReadOnly();
679        int n = b.getIntAfterPosition(0);
680        // One int for the number of items, and n + 1 for the secondary index.
681        int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
682
683        // Get all the keys from the leaf index block. S
684        for (int i = 0; i < n; ++i) {
685          int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1));
686          int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2));
687          int keyLen = nextKeyRelOffset - keyRelOffset;
688          int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
689              HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
690          byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
691              + keyLen);
692          String blockKeyStr = Bytes.toString(blockKey);
693          blockKeys.add(blockKey);
694
695          // If the first key of the block is not among the keys written, we
696          // are not parsing the non-root index block format correctly.
697          assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
698              keyStrSet.contains(blockKeyStr));
699        }
700      }
701
702      // Validate the mid-key.
703      assertEquals(
704          Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
705          reader.midKey());
706
707      assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
708          reader.getTrailer().getUncompressedDataIndexSize());
709
710      reader.close();
711      reader2.close();
712    }
713  }
714
715  private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
716      throws IOException {
717    assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0,
718        scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i])));
719  }
720
721  private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
722      ByteBuffer buf) {
723    assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
724        + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
725        arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
726  }
727
728  /** Check a key/value pair after it was read by the reader */
729  private void checkKeyValue(String msgPrefix, byte[] expectedKey,
730      byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
731    if (!msgPrefix.isEmpty())
732      msgPrefix += ". ";
733
734    assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
735    assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
736        valueRead);
737  }
738
739  @Test
740  public void testIntermediateLevelIndicesWithLargeKeys() throws IOException {
741    testIntermediateLevelIndicesWithLargeKeys(16);
742  }
743
744  @Test
745  public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException {
746    // because of the large rowKeys, we will end up with a 50-level block index without sanity check
747    testIntermediateLevelIndicesWithLargeKeys(2);
748  }
749
750  public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException {
751    Path hfPath = new Path(TEST_UTIL.getDataTestDir(),
752      "testIntermediateLevelIndicesWithLargeKeys.hfile");
753    int maxChunkSize = 1024;
754    FileSystem fs = FileSystem.get(conf);
755    CacheConfig cacheConf = new CacheConfig(conf);
756    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
757    conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries);
758    HFileContext context = new HFileContextBuilder().withBlockSize(16).build();
759    HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf)
760            .withFileContext(context)
761            .withPath(fs, hfPath).create();
762    List<byte[]> keys = new ArrayList<>();
763
764    // This should result in leaf-level indices and a root level index
765    for (int i=0; i < 100; i++) {
766      byte[] rowkey = new byte[maxChunkSize + 1];
767      byte[] b = Bytes.toBytes(i);
768      System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length);
769      keys.add(rowkey);
770      hfw.append(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
771        .setRow(rowkey).setFamily(HConstants.EMPTY_BYTE_ARRAY)
772        .setQualifier(HConstants.EMPTY_BYTE_ARRAY)
773        .setTimestamp(HConstants.LATEST_TIMESTAMP)
774        .setType(KeyValue.Type.Maximum.getCode())
775        .setValue(HConstants.EMPTY_BYTE_ARRAY)
776        .build());
777    }
778    hfw.close();
779
780    HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf);
781    // Scanner doesn't do Cells yet.  Fix.
782    HFileScanner scanner = reader.getScanner(conf, true, true);
783    for (int i = 0; i < keys.size(); ++i) {
784      scanner.seekTo(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
785        .setRow(keys.get(i)).setFamily(HConstants.EMPTY_BYTE_ARRAY)
786        .setQualifier(HConstants.EMPTY_BYTE_ARRAY)
787        .setTimestamp(HConstants.LATEST_TIMESTAMP)
788        .setType(KeyValue.Type.Maximum.getCode())
789        .setValue(HConstants.EMPTY_BYTE_ARRAY)
790        .build());
791    }
792    reader.close();
793  }
794}
795