001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.ByteArrayOutputStream;
025import java.io.DataOutputStream;
026import java.io.IOException;
027import java.nio.ByteBuffer;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Random;
034import java.util.Set;
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.FSDataInputStream;
037import org.apache.hadoop.fs.FSDataOutputStream;
038import org.apache.hadoop.fs.FileSystem;
039import org.apache.hadoop.fs.Path;
040import org.apache.hadoop.hbase.CellBuilderType;
041import org.apache.hadoop.hbase.CellComparatorImpl;
042import org.apache.hadoop.hbase.CellUtil;
043import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
044import org.apache.hadoop.hbase.HBaseClassTestRule;
045import org.apache.hadoop.hbase.HBaseCommonTestingUtil;
046import org.apache.hadoop.hbase.HBaseTestingUtil;
047import org.apache.hadoop.hbase.HConstants;
048import org.apache.hadoop.hbase.KeyValue;
049import org.apache.hadoop.hbase.KeyValueUtil;
050import org.apache.hadoop.hbase.PrivateCellUtil;
051import org.apache.hadoop.hbase.fs.HFileSystem;
052import org.apache.hadoop.hbase.io.ByteBuffAllocator;
053import org.apache.hadoop.hbase.io.compress.Compression;
054import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
055import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
056import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
057import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
058import org.apache.hadoop.hbase.nio.ByteBuff;
059import org.apache.hadoop.hbase.nio.MultiByteBuff;
060import org.apache.hadoop.hbase.testclassification.IOTests;
061import org.apache.hadoop.hbase.testclassification.MediumTests;
062import org.apache.hadoop.hbase.util.Bytes;
063import org.apache.hadoop.hbase.util.ClassSize;
064import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
065import org.junit.Before;
066import org.junit.ClassRule;
067import org.junit.Test;
068import org.junit.experimental.categories.Category;
069import org.junit.runner.RunWith;
070import org.junit.runners.Parameterized;
071import org.junit.runners.Parameterized.Parameters;
072import org.slf4j.Logger;
073import org.slf4j.LoggerFactory;
074
075@RunWith(Parameterized.class)
076@Category({ IOTests.class, MediumTests.class })
077public class TestHFileBlockIndex {
078
079  @ClassRule
080  public static final HBaseClassTestRule CLASS_RULE =
081    HBaseClassTestRule.forClass(TestHFileBlockIndex.class);
082
083  @Parameters
084  public static Collection<Object[]> compressionAlgorithms() {
085    return HBaseCommonTestingUtil.COMPRESSION_ALGORITHMS_PARAMETERIZED;
086  }
087
088  public TestHFileBlockIndex(Compression.Algorithm compr) {
089    this.compr = compr;
090  }
091
092  private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class);
093  private static final Random RNG = new Random(); // This test depends on Random#setSeed
094  private static final int NUM_DATA_BLOCKS = 1000;
095  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
096
097  private static final int SMALL_BLOCK_SIZE = 4096;
098  private static final int NUM_KV = 10000;
099
100  private static FileSystem fs;
101  private Path path;
102  private long rootIndexOffset;
103  private int numRootEntries;
104  private int numLevels;
105  private static final List<byte[]> keys = new ArrayList<>();
106  private final Compression.Algorithm compr;
107  private byte[] firstKeyInFile;
108  private Configuration conf;
109
110  private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
111  private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
112  private static final int[] UNCOMPRESSED_INDEX_SIZES = { 19187, 21813, 23086 };
113
114  private static final boolean includesMemstoreTS = true;
115
116  static {
117    assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
118    assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
119  }
120
121  @Before
122  public void setUp() throws IOException {
123    keys.clear();
124    firstKeyInFile = null;
125    conf = TEST_UTIL.getConfiguration();
126    RNG.setSeed(2389757);
127
128    // This test requires at least HFile format version 2.
129    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
130
131    fs = HFileSystem.get(conf);
132  }
133
134  @Test
135  public void testBlockIndex() throws IOException {
136    testBlockIndexInternals(false);
137    clear();
138    testBlockIndexInternals(true);
139  }
140
141  private void clear() throws IOException {
142    keys.clear();
143    firstKeyInFile = null;
144    conf = TEST_UTIL.getConfiguration();
145    RNG.setSeed(2389757);
146
147    // This test requires at least HFile format version 2.
148    conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
149
150    fs = HFileSystem.get(conf);
151  }
152
153  private void testBlockIndexInternals(boolean useTags) throws IOException {
154    path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
155    writeWholeIndex(useTags);
156    readIndex(useTags);
157  }
158
159  /**
160   * A wrapper around a block reader which only caches the results of the last operation. Not
161   * thread-safe.
162   */
163  private static class BlockReaderWrapper implements HFile.CachingBlockReader {
164
165    private HFileBlock.FSReader realReader;
166    private long prevOffset;
167    private long prevOnDiskSize;
168    private boolean prevPread;
169    private HFileBlock prevBlock;
170
171    public int hitCount = 0;
172    public int missCount = 0;
173
174    public BlockReaderWrapper(HFileBlock.FSReader realReader) {
175      this.realReader = realReader;
176    }
177
178    @Override
179    public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread,
180      boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
181      DataBlockEncoding expectedDataBlockEncoding) throws IOException {
182      if (offset == prevOffset && onDiskSize == prevOnDiskSize && pread == prevPread) {
183        hitCount += 1;
184        return prevBlock;
185      }
186
187      missCount += 1;
188      prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true);
189      prevOffset = offset;
190      prevOnDiskSize = onDiskSize;
191      prevPread = pread;
192
193      return prevBlock;
194    }
195  }
196
197  private void readIndex(boolean useTags) throws IOException {
198    long fileSize = fs.getFileStatus(path).getLen();
199    LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString());
200
201    FSDataInputStream istream = fs.open(path);
202    HFileContext meta =
203      new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS)
204        .withIncludesTags(useTags).withCompression(compr).build();
205    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build();
206    HFileBlock.FSReader blockReader =
207      new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, conf);
208
209    BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
210    HFileBlockIndex.BlockIndexReader indexReader =
211      new HFileBlockIndex.CellBasedKeyBlockIndexReader(CellComparatorImpl.COMPARATOR, numLevels);
212
213    indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, fileSize)
214      .nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
215
216    long prevOffset = -1;
217    int i = 0;
218    int expectedHitCount = 0;
219    int expectedMissCount = 0;
220    LOG.info("Total number of keys: " + keys.size());
221    for (byte[] key : keys) {
222      assertTrue(key != null);
223      assertTrue(indexReader != null);
224      KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
225      HFileBlock b = indexReader.seekToDataBlock(keyOnlyKey, null, true, true, false, null, brw);
226      if (
227        PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0,
228          firstKeyInFile.length) < 0
229      ) {
230        assertTrue(b == null);
231        ++i;
232        continue;
233      }
234
235      String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
236
237      assertTrue("seekToDataBlock failed for " + keyStr, b != null);
238
239      if (prevOffset == b.getOffset()) {
240        assertEquals(++expectedHitCount, brw.hitCount);
241      } else {
242        LOG.info("First key in a new block: " + keyStr + ", block offset: " + b.getOffset() + ")");
243        assertTrue(b.getOffset() > prevOffset);
244        assertEquals(++expectedMissCount, brw.missCount);
245        prevOffset = b.getOffset();
246      }
247      ++i;
248    }
249
250    istream.close();
251  }
252
253  private void writeWholeIndex(boolean useTags) throws IOException {
254    assertEquals(0, keys.size());
255    HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true)
256      .withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTags).withCompression(compr)
257      .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build();
258    HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta);
259    FSDataOutputStream outputStream = fs.create(path);
260    HFileBlockIndex.BlockIndexWriter biw = new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
261    for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
262      hbw.startWriting(BlockType.DATA).write(Bytes.toBytes(String.valueOf(RNG.nextInt(1000))));
263      long blockOffset = outputStream.getPos();
264      hbw.writeHeaderAndData(outputStream);
265
266      byte[] firstKey = null;
267      byte[] family = Bytes.toBytes("f");
268      byte[] qualifier = Bytes.toBytes("q");
269      for (int j = 0; j < 16; ++j) {
270        byte[] k = new KeyValue(RandomKeyValueUtil.randomOrderedKey(RNG, i * 16 + j), family,
271          qualifier, EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey();
272        keys.add(k);
273        if (j == 8) {
274          firstKey = k;
275        }
276      }
277      assertTrue(firstKey != null);
278      if (firstKeyInFile == null) {
279        firstKeyInFile = firstKey;
280      }
281      biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
282
283      writeInlineBlocks(hbw, outputStream, biw, false);
284    }
285    writeInlineBlocks(hbw, outputStream, biw, true);
286    rootIndexOffset = biw.writeIndexBlocks(outputStream);
287    outputStream.close();
288
289    numLevels = biw.getNumLevels();
290    numRootEntries = biw.getNumRootEntries();
291
292    LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" + numRootEntries
293      + ", rootIndexOffset=" + rootIndexOffset);
294  }
295
296  private void writeInlineBlocks(HFileBlock.Writer hbw, FSDataOutputStream outputStream,
297    HFileBlockIndex.BlockIndexWriter biw, boolean isClosing) throws IOException {
298    while (biw.shouldWriteBlock(isClosing)) {
299      long offset = outputStream.getPos();
300      biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
301      hbw.writeHeaderAndData(outputStream);
302      biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
303        hbw.getUncompressedSizeWithoutHeader());
304      LOG.info(
305        "Wrote an inline index block at " + offset + ", size " + hbw.getOnDiskSizeWithHeader());
306    }
307  }
308
309  private static final long getDummyFileOffset(int i) {
310    return i * 185 + 379;
311  }
312
313  private static final int getDummyOnDiskSize(int i) {
314    return i * i * 37 + i * 19 + 13;
315  }
316
317  @Test
318  public void testSecondaryIndexBinarySearch() throws IOException {
319    int numTotalKeys = 99;
320    assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
321
322    // We only add odd-index keys into the array that we will binary-search.
323    int numSearchedKeys = (numTotalKeys - 1) / 2;
324
325    ByteArrayOutputStream baos = new ByteArrayOutputStream();
326    DataOutputStream dos = new DataOutputStream(baos);
327
328    dos.writeInt(numSearchedKeys);
329    int curAllEntriesSize = 0;
330    int numEntriesAdded = 0;
331
332    // Only odd-index elements of this array are used to keep the secondary
333    // index entries of the corresponding keys.
334    int secondaryIndexEntries[] = new int[numTotalKeys];
335
336    for (int i = 0; i < numTotalKeys; ++i) {
337      byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i * 2);
338      KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("val"));
339      // KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length);
340      keys.add(cell.getKey());
341      String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
342      StringBuilder padding = new StringBuilder();
343      while (msgPrefix.length() + padding.length() < 70)
344        padding.append(' ');
345      msgPrefix += padding;
346      if (i % 2 == 1) {
347        dos.writeInt(curAllEntriesSize);
348        secondaryIndexEntries[i] = curAllEntriesSize;
349        LOG.info(
350          msgPrefix + "secondary index entry #" + ((i - 1) / 2) + ", offset " + curAllEntriesSize);
351        curAllEntriesSize += cell.getKey().length + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
352        ++numEntriesAdded;
353      } else {
354        secondaryIndexEntries[i] = -1;
355        LOG.info(msgPrefix + "not in the searched array");
356      }
357    }
358
359    // Make sure the keys are increasing.
360    for (int i = 0; i < keys.size() - 1; ++i)
361      assertTrue(CellComparatorImpl.COMPARATOR.compare(
362        new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length),
363        new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0);
364
365    dos.writeInt(curAllEntriesSize);
366    assertEquals(numSearchedKeys, numEntriesAdded);
367    int secondaryIndexOffset = dos.size();
368    assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2), secondaryIndexOffset);
369
370    for (int i = 1; i <= numTotalKeys - 1; i += 2) {
371      assertEquals(dos.size(), secondaryIndexOffset + secondaryIndexEntries[i]);
372      long dummyFileOffset = getDummyFileOffset(i);
373      int dummyOnDiskSize = getDummyOnDiskSize(i);
374      LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" + dummyOnDiskSize
375        + " at offset " + dos.size());
376      dos.writeLong(dummyFileOffset);
377      dos.writeInt(dummyOnDiskSize);
378      LOG.debug("Stored key " + ((i - 1) / 2) + " at offset " + dos.size());
379      dos.write(keys.get(i));
380    }
381
382    dos.writeInt(curAllEntriesSize);
383
384    ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
385    for (int i = 0; i < numTotalKeys; ++i) {
386      byte[] searchKey = keys.get(i);
387      byte[] arrayHoldingKey = new byte[searchKey.length + searchKey.length / 2];
388
389      // To make things a bit more interesting, store the key we are looking
390      // for at a non-zero offset in a new array.
391      System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2, searchKey.length);
392
393      KeyValue.KeyOnlyKeyValue cell =
394        new KeyValue.KeyOnlyKeyValue(arrayHoldingKey, searchKey.length / 2, searchKey.length);
395      int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell,
396        new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR);
397      String lookupFailureMsg =
398        "Failed to look up key #" + i + " (" + Bytes.toStringBinary(searchKey) + ")";
399
400      int expectedResult;
401      int referenceItem;
402
403      if (i % 2 == 1) {
404        // This key is in the array we search as the element (i - 1) / 2. Make
405        // sure we find it.
406        expectedResult = (i - 1) / 2;
407        referenceItem = i;
408      } else {
409        // This key is not in the array but between two elements on the array,
410        // in the beginning, or in the end. The result should be the previous
411        // key in the searched array, or -1 for i = 0.
412        expectedResult = i / 2 - 1;
413        referenceItem = i - 1;
414      }
415
416      assertEquals(lookupFailureMsg, expectedResult, searchResult);
417
418      // Now test we can get the offset and the on-disk-size using a
419      // higher-level API function.s
420      boolean locateBlockResult =
421        (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell,
422          CellComparatorImpl.COMPARATOR) != -1);
423
424      if (i == 0) {
425        assertFalse(locateBlockResult);
426      } else {
427        assertTrue(locateBlockResult);
428        String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
429        assertEquals(errorMsg, getDummyFileOffset(referenceItem), nonRootIndex.getLong());
430        assertEquals(errorMsg, getDummyOnDiskSize(referenceItem), nonRootIndex.getInt());
431      }
432    }
433
434  }
435
436  @Test
437  public void testBlockIndexChunk() throws IOException {
438    BlockIndexChunk c = new BlockIndexChunk();
439    ByteArrayOutputStream baos = new ByteArrayOutputStream();
440    int N = 1000;
441    int[] numSubEntriesAt = new int[N];
442    int numSubEntries = 0;
443    for (int i = 0; i < N; ++i) {
444      baos.reset();
445      DataOutputStream dos = new DataOutputStream(baos);
446      c.writeNonRoot(dos);
447      assertEquals(c.getNonRootSize(), dos.size());
448
449      baos.reset();
450      dos = new DataOutputStream(baos);
451      c.writeRoot(dos);
452      assertEquals(c.getRootSize(), dos.size());
453
454      byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i);
455      numSubEntries += RNG.nextInt(5) + 1;
456      keys.add(k);
457      c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
458    }
459
460    // Test the ability to look up the entry that contains a particular
461    // deeper-level index block's entry ("sub-entry"), assuming a global
462    // 0-based ordering of sub-entries. This is needed for mid-key calculation.
463    for (int i = 0; i < N; ++i) {
464      for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1]; j < numSubEntriesAt[i]; ++j) {
465        assertEquals(i, c.getEntryBySubEntry(j));
466      }
467    }
468  }
469
470  /** Checks if the HeapSize calculator is within reason */
471  @Test
472  public void testHeapSizeForBlockIndex() throws IOException {
473    Class<HFileBlockIndex.BlockIndexReader> cl = HFileBlockIndex.BlockIndexReader.class;
474    long expected = ClassSize.estimateBase(cl, false);
475
476    HFileBlockIndex.BlockIndexReader bi = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
477    long actual = bi.heapSize();
478
479    // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
480    // int [] blockDataSizes) are all null they are not going to show up in the
481    // HeapSize calculation, so need to remove those array costs from expected.
482    // Already the block keys are not there in this case
483    expected -= ClassSize.align(2 * ClassSize.ARRAY);
484
485    if (expected != actual) {
486      expected = ClassSize.estimateBase(cl, true);
487      assertEquals(expected, actual);
488    }
489  }
490
491  /**
492   * to check if looks good when midKey on a leaf index block boundary n
493   */
494  @Test
495  public void testMidKeyOnLeafIndexBlockBoundary() throws IOException {
496    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey");
497    int maxChunkSize = 512;
498    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
499    // should open hfile.block.index.cacheonwrite
500    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true);
501    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
502    BlockCache blockCache = cacheConf.getBlockCache().get();
503    // Evict all blocks that were cached-on-write by the previous invocation.
504    blockCache.evictBlocksByHfileName(hfilePath.getName());
505    // Write the HFile
506    HFileContext meta = new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE)
507      .withCompression(Algorithm.NONE).withDataBlockEncoding(DataBlockEncoding.NONE).build();
508    HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath)
509      .withFileContext(meta).create();
510    Random rand = new Random(19231737);
511    byte[] family = Bytes.toBytes("f");
512    byte[] qualifier = Bytes.toBytes("q");
513    int kvNumberToBeWritten = 16;
514    // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks,
515    // midkey is just on the boundary of the first leaf-index block
516    for (int i = 0; i < kvNumberToBeWritten; ++i) {
517      byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30);
518
519      // Key will be interpreted by KeyValue.KEY_COMPARATOR
520      KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
521        RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE));
522      writer.append(kv);
523    }
524    writer.close();
525
526    // close hfile.block.index.cacheonwrite
527    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
528
529    // Read the HFile
530    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
531
532    boolean hasArrayIndexOutOfBoundsException = false;
533    try {
534      // get the mid-key.
535      reader.midKey();
536    } catch (ArrayIndexOutOfBoundsException e) {
537      hasArrayIndexOutOfBoundsException = true;
538    } finally {
539      reader.close();
540    }
541
542    // to check if ArrayIndexOutOfBoundsException occurred
543    assertFalse(hasArrayIndexOutOfBoundsException);
544  }
545
546  /**
547   * Testing block index through the HFile writer/reader APIs. Allows to test setting index block
548   * size through configuration, intermediate-level index blocks, and caching index blocks on write.
549   * n
550   */
551  @Test
552  public void testHFileWriterAndReader() throws IOException {
553    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_block_index");
554    CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf));
555    BlockCache blockCache = cacheConf.getBlockCache().get();
556
557    for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
558      int indexBlockSize = INDEX_CHUNK_SIZES[testI];
559      int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
560      LOG.info("Index block size: " + indexBlockSize + ", compression: " + compr);
561      // Evict all blocks that were cached-on-write by the previous invocation.
562      blockCache.evictBlocksByHfileName(hfilePath.getName());
563
564      conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
565      Set<String> keyStrSet = new HashSet<>();
566      byte[][] keys = new byte[NUM_KV][];
567      byte[][] values = new byte[NUM_KV][];
568
569      // Write the HFile
570      {
571        HFileContext meta =
572          new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(compr).build();
573        HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath)
574          .withFileContext(meta).create();
575        Random rand = new Random(19231737);
576        byte[] family = Bytes.toBytes("f");
577        byte[] qualifier = Bytes.toBytes("q");
578        for (int i = 0; i < NUM_KV; ++i) {
579          byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i);
580
581          // Key will be interpreted by KeyValue.KEY_COMPARATOR
582          KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
583            RandomKeyValueUtil.randomValue(rand));
584          byte[] k = kv.getKey();
585          writer.append(kv);
586          keys[i] = k;
587          values[i] = CellUtil.cloneValue(kv);
588          keyStrSet.add(Bytes.toStringBinary(k));
589          if (i > 0) {
590            assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1], 0,
591              keys[i - 1].length)) > 0);
592          }
593        }
594
595        writer.close();
596      }
597
598      // Read the HFile
599      HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
600      assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
601
602      assertTrue(Bytes.equals(keys[0], ((KeyValue) reader.getFirstKey().get()).getKey()));
603      assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue) reader.getLastKey().get()).getKey()));
604      LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
605
606      for (boolean pread : new boolean[] { false, true }) {
607        HFileScanner scanner = reader.getScanner(conf, true, pread);
608        for (int i = 0; i < NUM_KV; ++i) {
609          checkSeekTo(keys, scanner, i);
610          checkKeyValue("i=" + i, keys[i], values[i],
611            ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
612        }
613        assertTrue(scanner.seekTo());
614        for (int i = NUM_KV - 1; i >= 0; --i) {
615          checkSeekTo(keys, scanner, i);
616          checkKeyValue("i=" + i, keys[i], values[i],
617            ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue());
618        }
619      }
620
621      // Manually compute the mid-key and validate it.
622      HFile.Reader reader2 = reader;
623      HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
624
625      HFileBlock.BlockIterator iter =
626        fsReader.blockRange(0, reader.getTrailer().getLoadOnOpenDataOffset());
627      HFileBlock block;
628      List<byte[]> blockKeys = new ArrayList<>();
629      while ((block = iter.nextBlock()) != null) {
630        if (block.getBlockType() != BlockType.LEAF_INDEX) return;
631        ByteBuff b = block.getBufferReadOnly();
632        int n = b.getIntAfterPosition(0);
633        // One int for the number of items, and n + 1 for the secondary index.
634        int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
635
636        // Get all the keys from the leaf index block. S
637        for (int i = 0; i < n; ++i) {
638          int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1));
639          int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2));
640          int keyLen = nextKeyRelOffset - keyRelOffset;
641          int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset
642            + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
643          byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen);
644          String blockKeyStr = Bytes.toString(blockKey);
645          blockKeys.add(blockKey);
646
647          // If the first key of the block is not among the keys written, we
648          // are not parsing the non-root index block format correctly.
649          assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
650            keyStrSet.contains(blockKeyStr));
651        }
652      }
653
654      // Validate the mid-key.
655      assertEquals(Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
656        reader.midKey());
657
658      assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
659        reader.getTrailer().getUncompressedDataIndexSize());
660
661      reader.close();
662      reader2.close();
663    }
664  }
665
666  private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) throws IOException {
667    assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0,
668      scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i])));
669  }
670
671  private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, ByteBuffer buf) {
672    assertEquals(
673      msgPrefix + ": expected " + Bytes.toStringBinary(arr) + ", actual "
674        + Bytes.toStringBinary(buf),
675      0, Bytes.compareTo(arr, 0, arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
676  }
677
678  /** Check a key/value pair after it was read by the reader */
679  private void checkKeyValue(String msgPrefix, byte[] expectedKey, byte[] expectedValue,
680    ByteBuffer keyRead, ByteBuffer valueRead) {
681    if (!msgPrefix.isEmpty()) msgPrefix += ". ";
682
683    assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
684    assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, valueRead);
685  }
686
687  @Test
688  public void testIntermediateLevelIndicesWithLargeKeys() throws IOException {
689    testIntermediateLevelIndicesWithLargeKeys(16);
690  }
691
692  @Test
693  public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException {
694    // because of the large rowKeys, we will end up with a 50-level block index without sanity check
695    testIntermediateLevelIndicesWithLargeKeys(2);
696  }
697
698  public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException {
699    Path hfPath =
700      new Path(TEST_UTIL.getDataTestDir(), "testIntermediateLevelIndicesWithLargeKeys.hfile");
701    int maxChunkSize = 1024;
702    FileSystem fs = FileSystem.get(conf);
703    CacheConfig cacheConf = new CacheConfig(conf);
704    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
705    conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries);
706    HFileContext context = new HFileContextBuilder().withBlockSize(16).build();
707    HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf).withFileContext(context)
708      .withPath(fs, hfPath).create();
709    List<byte[]> keys = new ArrayList<>();
710
711    // This should result in leaf-level indices and a root level index
712    for (int i = 0; i < 100; i++) {
713      byte[] rowkey = new byte[maxChunkSize + 1];
714      byte[] b = Bytes.toBytes(i);
715      System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length);
716      keys.add(rowkey);
717      hfw.append(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY).setRow(rowkey)
718        .setFamily(HConstants.EMPTY_BYTE_ARRAY).setQualifier(HConstants.EMPTY_BYTE_ARRAY)
719        .setTimestamp(HConstants.LATEST_TIMESTAMP).setType(KeyValue.Type.Maximum.getCode())
720        .setValue(HConstants.EMPTY_BYTE_ARRAY).build());
721    }
722    hfw.close();
723
724    HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf);
725    // Scanner doesn't do Cells yet. Fix.
726    HFileScanner scanner = reader.getScanner(conf, true, true);
727    for (int i = 0; i < keys.size(); ++i) {
728      scanner.seekTo(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
729        .setRow(keys.get(i)).setFamily(HConstants.EMPTY_BYTE_ARRAY)
730        .setQualifier(HConstants.EMPTY_BYTE_ARRAY).setTimestamp(HConstants.LATEST_TIMESTAMP)
731        .setType(KeyValue.Type.Maximum.getCode()).setValue(HConstants.EMPTY_BYTE_ARRAY).build());
732    }
733    reader.close();
734  }
735}