001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.ByteArrayOutputStream;
025import java.io.DataOutputStream;
026import java.io.IOException;
027import java.nio.ByteBuffer;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Random;
034import java.util.Set;
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.FSDataInputStream;
037import org.apache.hadoop.fs.FSDataOutputStream;
038import org.apache.hadoop.fs.FileSystem;
039import org.apache.hadoop.fs.Path;
040import org.apache.hadoop.hbase.CellComparatorImpl;
041import org.apache.hadoop.hbase.CellUtil;
042import org.apache.hadoop.hbase.HBaseClassTestRule;
043import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
044import org.apache.hadoop.hbase.HBaseTestingUtility;
045import org.apache.hadoop.hbase.KeyValue;
046import org.apache.hadoop.hbase.KeyValueUtil;
047import org.apache.hadoop.hbase.PrivateCellUtil;
048import org.apache.hadoop.hbase.fs.HFileSystem;
049import org.apache.hadoop.hbase.io.compress.Compression;
050import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
051import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
052import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
053import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
054import org.apache.hadoop.hbase.nio.ByteBuff;
055import org.apache.hadoop.hbase.nio.MultiByteBuff;
056import org.apache.hadoop.hbase.testclassification.IOTests;
057import org.apache.hadoop.hbase.testclassification.MediumTests;
058import org.apache.hadoop.hbase.util.Bytes;
059import org.apache.hadoop.hbase.util.ClassSize;
060import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
061import org.junit.Before;
062import org.junit.ClassRule;
063import org.junit.Test;
064import org.junit.experimental.categories.Category;
065import org.junit.runner.RunWith;
066import org.junit.runners.Parameterized;
067import org.junit.runners.Parameterized.Parameters;
068import org.slf4j.Logger;
069import org.slf4j.LoggerFactory;
070
071@RunWith(Parameterized.class)
072@Category({IOTests.class, MediumTests.class})
073public class TestHFileBlockIndex {
074
075  @ClassRule
076  public static final HBaseClassTestRule CLASS_RULE =
077      HBaseClassTestRule.forClass(TestHFileBlockIndex.class);
078
079  @Parameters
080  public static Collection<Object[]> compressionAlgorithms() {
081    return HBaseCommonTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
082  }
083
084  public TestHFileBlockIndex(Compression.Algorithm compr) {
085    this.compr = compr;
086  }
087
088  private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class);
089
090  private static final int NUM_DATA_BLOCKS = 1000;
091  private static final HBaseTestingUtility TEST_UTIL =
092      new HBaseTestingUtility();
093
094  private static final int SMALL_BLOCK_SIZE = 4096;
095  private static final int NUM_KV = 10000;
096
097  private static FileSystem fs;
098  private Path path;
099  private Random rand;
100  private long rootIndexOffset;
101  private int numRootEntries;
102  private int numLevels;
103  private static final List<byte[]> keys = new ArrayList<>();
104  private final Compression.Algorithm compr;
105  private byte[] firstKeyInFile;
106  private Configuration conf;
107
108  private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
109  private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
110  private static final int[] UNCOMPRESSED_INDEX_SIZES =
111      { 19187, 21813, 23086 };
112
113  private static final boolean includesMemstoreTS = true;
114
115  static {
116    assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
117    assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
118  }
119
120  @Before
121  public void setUp() throws IOException {
122    keys.clear();
123    rand = new Random(2389757);
124    firstKeyInFile = null;
125    conf = TEST_UTIL.getConfiguration();
126
127    // This test requires at least HFile format version 2.
128    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
129
130    fs = HFileSystem.get(conf);
131  }
132
133  @Test
134  public void testBlockIndex() throws IOException {
135    testBlockIndexInternals(false);
136    clear();
137    testBlockIndexInternals(true);
138  }
139
140  private void clear() throws IOException {
141    keys.clear();
142    rand = new Random(2389757);
143    firstKeyInFile = null;
144    conf = TEST_UTIL.getConfiguration();
145
146    // This test requires at least HFile format version 2.
147    conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
148
149    fs = HFileSystem.get(conf);
150  }
151
152  private void testBlockIndexInternals(boolean useTags) throws IOException {
153    path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
154    writeWholeIndex(useTags);
155    readIndex(useTags);
156  }
157
158  /**
159   * A wrapper around a block reader which only caches the results of the last
160   * operation. Not thread-safe.
161   */
162  private static class BlockReaderWrapper implements HFile.CachingBlockReader {
163
164    private HFileBlock.FSReader realReader;
165    private long prevOffset;
166    private long prevOnDiskSize;
167    private boolean prevPread;
168    private HFileBlock prevBlock;
169
170    public int hitCount = 0;
171    public int missCount = 0;
172
173    public BlockReaderWrapper(HFileBlock.FSReader realReader) {
174      this.realReader = realReader;
175    }
176
177    @Override
178    public void returnBlock(HFileBlock block) {
179    }
180
181    @Override
182    public HFileBlock readBlock(long offset, long onDiskSize,
183        boolean cacheBlock, boolean pread, boolean isCompaction,
184        boolean updateCacheMetrics, BlockType expectedBlockType,
185        DataBlockEncoding expectedDataBlockEncoding)
186        throws IOException {
187      if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
188          pread == prevPread) {
189        hitCount += 1;
190        return prevBlock;
191      }
192
193      missCount += 1;
194      prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false);
195      prevOffset = offset;
196      prevOnDiskSize = onDiskSize;
197      prevPread = pread;
198
199      return prevBlock;
200    }
201  }
202
203  private void readIndex(boolean useTags) throws IOException {
204    long fileSize = fs.getFileStatus(path).getLen();
205    LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString());
206
207    FSDataInputStream istream = fs.open(path);
208    HFileContext meta = new HFileContextBuilder()
209                        .withHBaseCheckSum(true)
210                        .withIncludesMvcc(includesMemstoreTS)
211                        .withIncludesTags(useTags)
212                        .withCompression(compr)
213                        .build();
214    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(istream, fs.getFileStatus(path)
215        .getLen(), meta);
216
217    BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
218    HFileBlockIndex.BlockIndexReader indexReader =
219        new HFileBlockIndex.CellBasedKeyBlockIndexReader(
220            CellComparatorImpl.COMPARATOR, numLevels, brw);
221
222    indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
223        fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
224
225    long prevOffset = -1;
226    int i = 0;
227    int expectedHitCount = 0;
228    int expectedMissCount = 0;
229    LOG.info("Total number of keys: " + keys.size());
230    for (byte[] key : keys) {
231      assertTrue(key != null);
232      assertTrue(indexReader != null);
233      KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
234      HFileBlock b =
235          indexReader.seekToDataBlock(keyOnlyKey, null, true,
236            true, false, null);
237      if (PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0,
238        firstKeyInFile.length) < 0) {
239        assertTrue(b == null);
240        ++i;
241        continue;
242      }
243
244      String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
245
246      assertTrue("seekToDataBlock failed for " + keyStr, b != null);
247
248      if (prevOffset == b.getOffset()) {
249        assertEquals(++expectedHitCount, brw.hitCount);
250      } else {
251        LOG.info("First key in a new block: " + keyStr + ", block offset: "
252            + b.getOffset() + ")");
253        assertTrue(b.getOffset() > prevOffset);
254        assertEquals(++expectedMissCount, brw.missCount);
255        prevOffset = b.getOffset();
256      }
257      ++i;
258    }
259
260    istream.close();
261  }
262
263  private void writeWholeIndex(boolean useTags) throws IOException {
264    assertEquals(0, keys.size());
265    HFileContext meta = new HFileContextBuilder()
266                        .withHBaseCheckSum(true)
267                        .withIncludesMvcc(includesMemstoreTS)
268                        .withIncludesTags(useTags)
269                        .withCompression(compr)
270                        .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
271                        .build();
272    HFileBlock.Writer hbw = new HFileBlock.Writer(null,
273        meta);
274    FSDataOutputStream outputStream = fs.create(path);
275    HFileBlockIndex.BlockIndexWriter biw =
276        new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
277
278    for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
279      hbw.startWriting(BlockType.DATA).write(String.valueOf(rand.nextInt(1000)).getBytes());
280      long blockOffset = outputStream.getPos();
281      hbw.writeHeaderAndData(outputStream);
282
283      byte[] firstKey = null;
284      byte[] family = Bytes.toBytes("f");
285      byte[] qualifier = Bytes.toBytes("q");
286      for (int j = 0; j < 16; ++j) {
287        byte[] k =
288            new KeyValue(RandomKeyValueUtil.randomOrderedKey(rand, i * 16 + j), family, qualifier,
289                EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey();
290        keys.add(k);
291        if (j == 8) {
292          firstKey = k;
293        }
294      }
295      assertTrue(firstKey != null);
296      if (firstKeyInFile == null) {
297        firstKeyInFile = firstKey;
298      }
299      biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
300
301      writeInlineBlocks(hbw, outputStream, biw, false);
302    }
303    writeInlineBlocks(hbw, outputStream, biw, true);
304    rootIndexOffset = biw.writeIndexBlocks(outputStream);
305    outputStream.close();
306
307    numLevels = biw.getNumLevels();
308    numRootEntries = biw.getNumRootEntries();
309
310    LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
311        numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
312  }
313
314  private void writeInlineBlocks(HFileBlock.Writer hbw,
315      FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
316      boolean isClosing) throws IOException {
317    while (biw.shouldWriteBlock(isClosing)) {
318      long offset = outputStream.getPos();
319      biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
320      hbw.writeHeaderAndData(outputStream);
321      biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
322          hbw.getUncompressedSizeWithoutHeader());
323      LOG.info("Wrote an inline index block at " + offset + ", size " +
324          hbw.getOnDiskSizeWithHeader());
325    }
326  }
327
328  private static final long getDummyFileOffset(int i) {
329    return i * 185 + 379;
330  }
331
332  private static final int getDummyOnDiskSize(int i) {
333    return i * i * 37 + i * 19 + 13;
334  }
335
336  @Test
337  public void testSecondaryIndexBinarySearch() throws IOException {
338    int numTotalKeys = 99;
339    assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
340
341    // We only add odd-index keys into the array that we will binary-search.
342    int numSearchedKeys = (numTotalKeys - 1) / 2;
343
344    ByteArrayOutputStream baos = new ByteArrayOutputStream();
345    DataOutputStream dos = new DataOutputStream(baos);
346
347    dos.writeInt(numSearchedKeys);
348    int curAllEntriesSize = 0;
349    int numEntriesAdded = 0;
350
351    // Only odd-index elements of this array are used to keep the secondary
352    // index entries of the corresponding keys.
353    int secondaryIndexEntries[] = new int[numTotalKeys];
354
355    for (int i = 0; i < numTotalKeys; ++i) {
356      byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i * 2);
357      KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"),
358          Bytes.toBytes("val"));
359      //KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length);
360      keys.add(cell.getKey());
361      String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
362      StringBuilder padding = new StringBuilder();
363      while (msgPrefix.length() + padding.length() < 70)
364        padding.append(' ');
365      msgPrefix += padding;
366      if (i % 2 == 1) {
367        dos.writeInt(curAllEntriesSize);
368        secondaryIndexEntries[i] = curAllEntriesSize;
369        LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
370            ", offset " + curAllEntriesSize);
371        curAllEntriesSize += cell.getKey().length
372            + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
373        ++numEntriesAdded;
374      } else {
375        secondaryIndexEntries[i] = -1;
376        LOG.info(msgPrefix + "not in the searched array");
377      }
378    }
379
380    // Make sure the keys are increasing.
381    for (int i = 0; i < keys.size() - 1; ++i)
382      assertTrue(CellComparatorImpl.COMPARATOR.compare(
383          new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length),
384          new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0);
385
386    dos.writeInt(curAllEntriesSize);
387    assertEquals(numSearchedKeys, numEntriesAdded);
388    int secondaryIndexOffset = dos.size();
389    assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
390        secondaryIndexOffset);
391
392    for (int i = 1; i <= numTotalKeys - 1; i += 2) {
393      assertEquals(dos.size(),
394          secondaryIndexOffset + secondaryIndexEntries[i]);
395      long dummyFileOffset = getDummyFileOffset(i);
396      int dummyOnDiskSize = getDummyOnDiskSize(i);
397      LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
398          dummyOnDiskSize + " at offset " + dos.size());
399      dos.writeLong(dummyFileOffset);
400      dos.writeInt(dummyOnDiskSize);
401      LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
402      dos.write(keys.get(i));
403    }
404
405    dos.writeInt(curAllEntriesSize);
406
407    ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
408    for (int i = 0; i < numTotalKeys; ++i) {
409      byte[] searchKey = keys.get(i);
410      byte[] arrayHoldingKey = new byte[searchKey.length +
411                                        searchKey.length / 2];
412
413      // To make things a bit more interesting, store the key we are looking
414      // for at a non-zero offset in a new array.
415      System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
416            searchKey.length);
417
418      KeyValue.KeyOnlyKeyValue cell = new KeyValue.KeyOnlyKeyValue(
419          arrayHoldingKey, searchKey.length / 2, searchKey.length);
420      int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell,
421          new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR);
422      String lookupFailureMsg = "Failed to look up key #" + i + " ("
423          + Bytes.toStringBinary(searchKey) + ")";
424
425      int expectedResult;
426      int referenceItem;
427
428      if (i % 2 == 1) {
429        // This key is in the array we search as the element (i - 1) / 2. Make
430        // sure we find it.
431        expectedResult = (i - 1) / 2;
432        referenceItem = i;
433      } else {
434        // This key is not in the array but between two elements on the array,
435        // in the beginning, or in the end. The result should be the previous
436        // key in the searched array, or -1 for i = 0.
437        expectedResult = i / 2 - 1;
438        referenceItem = i - 1;
439      }
440
441      assertEquals(lookupFailureMsg, expectedResult, searchResult);
442
443      // Now test we can get the offset and the on-disk-size using a
444      // higher-level API function.s
445      boolean locateBlockResult =
446          (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell,
447          CellComparatorImpl.COMPARATOR) != -1);
448
449      if (i == 0) {
450        assertFalse(locateBlockResult);
451      } else {
452        assertTrue(locateBlockResult);
453        String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
454        assertEquals(errorMsg, getDummyFileOffset(referenceItem),
455            nonRootIndex.getLong());
456        assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
457            nonRootIndex.getInt());
458      }
459    }
460
461  }
462
463  @Test
464  public void testBlockIndexChunk() throws IOException {
465    BlockIndexChunk c = new BlockIndexChunk();
466    ByteArrayOutputStream baos = new ByteArrayOutputStream();
467    int N = 1000;
468    int[] numSubEntriesAt = new int[N];
469    int numSubEntries = 0;
470    for (int i = 0; i < N; ++i) {
471      baos.reset();
472      DataOutputStream dos = new DataOutputStream(baos);
473      c.writeNonRoot(dos);
474      assertEquals(c.getNonRootSize(), dos.size());
475
476      baos.reset();
477      dos = new DataOutputStream(baos);
478      c.writeRoot(dos);
479      assertEquals(c.getRootSize(), dos.size());
480
481      byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i);
482      numSubEntries += rand.nextInt(5) + 1;
483      keys.add(k);
484      c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
485    }
486
487    // Test the ability to look up the entry that contains a particular
488    // deeper-level index block's entry ("sub-entry"), assuming a global
489    // 0-based ordering of sub-entries. This is needed for mid-key calculation.
490    for (int i = 0; i < N; ++i) {
491      for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
492           j < numSubEntriesAt[i];
493           ++j) {
494        assertEquals(i, c.getEntryBySubEntry(j));
495      }
496    }
497  }
498
499  /** Checks if the HeapSize calculator is within reason */
500  @Test
501  public void testHeapSizeForBlockIndex() throws IOException {
502    Class<HFileBlockIndex.BlockIndexReader> cl =
503        HFileBlockIndex.BlockIndexReader.class;
504    long expected = ClassSize.estimateBase(cl, false);
505
506    HFileBlockIndex.BlockIndexReader bi =
507        new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
508    long actual = bi.heapSize();
509
510    // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
511    // int [] blockDataSizes) are all null they are not going to show up in the
512    // HeapSize calculation, so need to remove those array costs from expected.
513    // Already the block keys are not there in this case
514    expected -= ClassSize.align(2 * ClassSize.ARRAY);
515
516    if (expected != actual) {
517      expected = ClassSize.estimateBase(cl, true);
518      assertEquals(expected, actual);
519    }
520  }
521
522  /**
523  * to check if looks good when midKey on a leaf index block boundary
524  * @throws IOException
525  */
526  @Test
527  public void testMidKeyOnLeafIndexBlockBoundary() throws IOException {
528    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey");
529    int maxChunkSize = 512;
530    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
531    // should open hfile.block.index.cacheonwrite
532    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true);
533    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
534    BlockCache blockCache = cacheConf.getBlockCache().get();
535    // Evict all blocks that were cached-on-write by the previous invocation.
536    blockCache.evictBlocksByHfileName(hfilePath.getName());
537    // Write the HFile
538    HFileContext meta =
539        new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(Algorithm.NONE)
540            .withDataBlockEncoding(DataBlockEncoding.NONE).build();
541    HFile.Writer writer =
542        HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath).withFileContext(meta)
543            .create();
544    Random rand = new Random(19231737);
545    byte[] family = Bytes.toBytes("f");
546    byte[] qualifier = Bytes.toBytes("q");
547    int kvNumberToBeWritten = 16;
548    // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks,
549    // midkey is just on the boundary of the first leaf-index block
550    for (int i = 0; i < kvNumberToBeWritten; ++i) {
551      byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30);
552
553      // Key will be interpreted by KeyValue.KEY_COMPARATOR
554      KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
555          RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE));
556      writer.append(kv);
557    }
558    writer.close();
559
560    // close hfile.block.index.cacheonwrite
561    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
562
563    // Read the HFile
564    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
565
566    boolean hasArrayIndexOutOfBoundsException = false;
567    try {
568      // get the mid-key.
569      reader.midKey();
570    } catch (ArrayIndexOutOfBoundsException e) {
571      hasArrayIndexOutOfBoundsException = true;
572    } finally {
573      reader.close();
574    }
575
576    // to check if ArrayIndexOutOfBoundsException occurred
577    assertFalse(hasArrayIndexOutOfBoundsException);
578  }
579
580  /**
581   * Testing block index through the HFile writer/reader APIs. Allows to test
582   * setting index block size through configuration, intermediate-level index
583   * blocks, and caching index blocks on write.
584   *
585   * @throws IOException
586   */
587  @Test
588  public void testHFileWriterAndReader() throws IOException {
589    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
590        "hfile_for_block_index");
591    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
592    BlockCache blockCache = cacheConf.getBlockCache().get();
593
594    for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
595      int indexBlockSize = INDEX_CHUNK_SIZES[testI];
596      int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
597      LOG.info("Index block size: " + indexBlockSize + ", compression: "
598          + compr);
599      // Evict all blocks that were cached-on-write by the previous invocation.
600      blockCache.evictBlocksByHfileName(hfilePath.getName());
601
602      conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
603      Set<String> keyStrSet = new HashSet<>();
604      byte[][] keys = new byte[NUM_KV][];
605      byte[][] values = new byte[NUM_KV][];
606
607      // Write the HFile
608      {
609        HFileContext meta = new HFileContextBuilder()
610                            .withBlockSize(SMALL_BLOCK_SIZE)
611                            .withCompression(compr)
612                            .build();
613        HFile.Writer writer =
614            HFile.getWriterFactory(conf, cacheConf)
615                .withPath(fs, hfilePath)
616                .withFileContext(meta)
617                .create();
618        Random rand = new Random(19231737);
619        byte[] family = Bytes.toBytes("f");
620        byte[] qualifier = Bytes.toBytes("q");
621        for (int i = 0; i < NUM_KV; ++i) {
622          byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i);
623
624          // Key will be interpreted by KeyValue.KEY_COMPARATOR
625          KeyValue kv =
626              new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
627                  RandomKeyValueUtil.randomValue(rand));
628          byte[] k = kv.getKey();
629          writer.append(kv);
630          keys[i] = k;
631          values[i] = CellUtil.cloneValue(kv);
632          keyStrSet.add(Bytes.toStringBinary(k));
633          if (i > 0) {
634            assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1],
635                0, keys[i - 1].length)) > 0);
636          }
637        }
638
639        writer.close();
640      }
641
642      // Read the HFile
643      HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
644      assertEquals(expectedNumLevels,
645          reader.getTrailer().getNumDataIndexLevels());
646
647      assertTrue(Bytes.equals(keys[0], ((KeyValue)reader.getFirstKey().get()).getKey()));
648      assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue)reader.getLastKey().get()).getKey()));
649      LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
650
651      for (boolean pread : new boolean[] { false, true }) {
652        HFileScanner scanner = reader.getScanner(true, pread);
653        for (int i = 0; i < NUM_KV; ++i) {
654          checkSeekTo(keys, scanner, i);
655          checkKeyValue("i=" + i, keys[i], values[i],
656              ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
657        }
658        assertTrue(scanner.seekTo());
659        for (int i = NUM_KV - 1; i >= 0; --i) {
660          checkSeekTo(keys, scanner, i);
661          checkKeyValue("i=" + i, keys[i], values[i],
662              ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
663        }
664      }
665
666      // Manually compute the mid-key and validate it.
667      HFile.Reader reader2 = reader;
668      HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
669
670      HFileBlock.BlockIterator iter = fsReader.blockRange(0,
671          reader.getTrailer().getLoadOnOpenDataOffset());
672      HFileBlock block;
673      List<byte[]> blockKeys = new ArrayList<>();
674      while ((block = iter.nextBlock()) != null) {
675        if (block.getBlockType() != BlockType.LEAF_INDEX)
676          return;
677        ByteBuff b = block.getBufferReadOnly();
678        int n = b.getIntAfterPosition(0);
679        // One int for the number of items, and n + 1 for the secondary index.
680        int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
681
682        // Get all the keys from the leaf index block. S
683        for (int i = 0; i < n; ++i) {
684          int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1));
685          int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2));
686          int keyLen = nextKeyRelOffset - keyRelOffset;
687          int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
688              HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
689          byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
690              + keyLen);
691          String blockKeyStr = Bytes.toString(blockKey);
692          blockKeys.add(blockKey);
693
694          // If the first key of the block is not among the keys written, we
695          // are not parsing the non-root index block format correctly.
696          assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
697              keyStrSet.contains(blockKeyStr));
698        }
699      }
700
701      // Validate the mid-key.
702      assertEquals(
703          Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
704          reader.midKey());
705
706      assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
707          reader.getTrailer().getUncompressedDataIndexSize());
708
709      reader.close();
710      reader2.close();
711    }
712  }
713
714  private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
715      throws IOException {
716    assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0,
717        scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i])));
718  }
719
720  private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
721      ByteBuffer buf) {
722    assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
723        + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
724        arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
725  }
726
727  /** Check a key/value pair after it was read by the reader */
728  private void checkKeyValue(String msgPrefix, byte[] expectedKey,
729      byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
730    if (!msgPrefix.isEmpty())
731      msgPrefix += ". ";
732
733    assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
734    assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
735        valueRead);
736  }
737
738  @Test
739  public void testIntermediateLevelIndicesWithLargeKeys() throws IOException {
740    testIntermediateLevelIndicesWithLargeKeys(16);
741  }
742
743  @Test
744  public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException {
745    // because of the large rowKeys, we will end up with a 50-level block index without sanity check
746    testIntermediateLevelIndicesWithLargeKeys(2);
747  }
748
749  public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException {
750    Path hfPath = new Path(TEST_UTIL.getDataTestDir(),
751      "testIntermediateLevelIndicesWithLargeKeys.hfile");
752    int maxChunkSize = 1024;
753    FileSystem fs = FileSystem.get(conf);
754    CacheConfig cacheConf = new CacheConfig(conf);
755    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
756    conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries);
757    HFileContext context = new HFileContextBuilder().withBlockSize(16).build();
758    HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf)
759            .withFileContext(context)
760            .withPath(fs, hfPath).create();
761    List<byte[]> keys = new ArrayList<>();
762
763    // This should result in leaf-level indices and a root level index
764    for (int i=0; i < 100; i++) {
765      byte[] rowkey = new byte[maxChunkSize + 1];
766      byte[] b = Bytes.toBytes(i);
767      System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length);
768      keys.add(rowkey);
769      hfw.append(CellUtil.createCell(rowkey));
770    }
771    hfw.close();
772
773    HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf);
774    // Scanner doesn't do Cells yet.  Fix.
775    HFileScanner scanner = reader.getScanner(true, true);
776    for (int i = 0; i < keys.size(); ++i) {
777      scanner.seekTo(CellUtil.createCell(keys.get(i)));
778    }
779    reader.close();
780  }
781}
782