001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.io.IOException;
024import java.util.Random;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.KeyValue;
034import org.apache.hadoop.hbase.fs.HFileSystem;
035import org.apache.hadoop.hbase.regionserver.BloomType;
036import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
037import org.apache.hadoop.hbase.testclassification.IOTests;
038import org.apache.hadoop.hbase.testclassification.MediumTests;
039import org.apache.hadoop.hbase.util.BloomFilterFactory;
040import org.apache.hadoop.hbase.util.BloomFilterUtil;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.junit.ClassRule;
043import org.junit.Test;
044import org.junit.experimental.categories.Category;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048@Category({ IOTests.class, MediumTests.class })
049public class TestSeekBeforeWithInlineBlocks {
050
051  @ClassRule
052  public static final HBaseClassTestRule CLASS_RULE =
053    HBaseClassTestRule.forClass(TestSeekBeforeWithInlineBlocks.class);
054
055  private static final Logger LOG = LoggerFactory.getLogger(TestSeekBeforeWithInlineBlocks.class);
056
057  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
058
059  private static final int NUM_KV = 10000;
060
061  private static final int DATA_BLOCK_SIZE = 4096;
062  private static final int BLOOM_BLOCK_SIZE = 1024;
063  private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 };
064  private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 };
065
066  private static final Random RAND = new Random(192537);
067  private static final byte[] FAM = Bytes.toBytes("family");
068
069  private FileSystem fs;
070  private Configuration conf;
071
072  /**
073   * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs
074   * to know the size of that data block, which it calculates using current data block offset and
075   * the previous data block offset. This fails to work when there are leaf-level index blocks in
076   * the scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore()
077   * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This
078   * bug also happens for inline Bloom blocks for the same reasons.
079   */
080  @Test
081  public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException {
082    conf = TEST_UTIL.getConfiguration();
083    TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
084
085    // Try out different HFile versions to ensure reverse scan works on each version
086    for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS; hfileVersion
087        <= HFile.MAX_FORMAT_VERSION; hfileVersion++) {
088
089      conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion);
090      fs = HFileSystem.get(conf);
091
092      // Try out different bloom types because inline Bloom blocks break seekBefore()
093      for (BloomType bloomType : BloomType.values()) {
094
095        // Test out HFile block indices of various sizes/levels
096        for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) {
097          int indexBlockSize = INDEX_CHUNK_SIZES[testI];
098          int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
099
100          LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s",
101            hfileVersion, bloomType, expectedNumLevels));
102
103          conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
104          conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE);
105          conf.setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
106
107          Cell[] cells = new Cell[NUM_KV];
108
109          Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), String.format(
110            "testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s", hfileVersion, bloomType, testI));
111
112          // Disable caching to prevent it from hiding any bugs in block seeks/reads
113          conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
114          CacheConfig cacheConf = new CacheConfig(conf);
115
116          // Write the HFile
117          {
118            HFileContext meta = new HFileContextBuilder().withBlockSize(DATA_BLOCK_SIZE).build();
119
120            StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs)
121              .withFilePath(hfilePath).withFileContext(meta).withBloomType(bloomType).build();
122
123            for (int i = 0; i < NUM_KV; i++) {
124              byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i);
125              byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND);
126              byte[] value = RandomKeyValueUtil.randomValue(RAND);
127              KeyValue kv = new KeyValue(row, FAM, qual, value);
128
129              storeFileWriter.append(kv);
130              cells[i] = kv;
131            }
132
133            storeFileWriter.close();
134          }
135
136          // Read the HFile
137          HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
138
139          // Sanity check the HFile index level
140          assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
141
142          // Check that we can seekBefore in either direction and with both pread
143          // enabled and disabled
144          for (boolean pread : new boolean[] { false, true }) {
145            HFileScanner scanner = reader.getScanner(conf, true, pread);
146            checkNoSeekBefore(cells, scanner, 0);
147            for (int i = 1; i < NUM_KV; i++) {
148              checkSeekBefore(cells, scanner, i);
149              checkCell(cells[i - 1], scanner.getCell());
150            }
151            assertTrue(scanner.seekTo());
152            for (int i = NUM_KV - 1; i >= 1; i--) {
153              checkSeekBefore(cells, scanner, i);
154              checkCell(cells[i - 1], scanner.getCell());
155            }
156            checkNoSeekBefore(cells, scanner, 0);
157            scanner.close();
158          }
159
160          reader.close();
161        }
162      }
163    }
164  }
165
166  private void checkSeekBefore(Cell[] cells, HFileScanner scanner, int i) throws IOException {
167    assertEquals(
168      "Failed to seek to the key before #" + i + " (" + CellUtil.getCellKeyAsString(cells[i]) + ")",
169      true, scanner.seekBefore(cells[i]));
170  }
171
172  private void checkNoSeekBefore(Cell[] cells, HFileScanner scanner, int i) throws IOException {
173    assertEquals("Incorrectly succeeded in seeking to before first key ("
174      + CellUtil.getCellKeyAsString(cells[i]) + ")", false, scanner.seekBefore(cells[i]));
175  }
176
177  /** Check a key/value pair after it was read by the reader */
178  private void checkCell(Cell expected, Cell actual) {
179    assertTrue(String.format("Expected key %s, but was %s", CellUtil.getCellKeyAsString(expected),
180      CellUtil.getCellKeyAsString(actual)), CellUtil.equals(expected, actual));
181  }
182}