001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.io.IOException;
024import java.util.Random;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.KeyValue;
034import org.apache.hadoop.hbase.fs.HFileSystem;
035import org.apache.hadoop.hbase.regionserver.BloomType;
036import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
037import org.apache.hadoop.hbase.testclassification.IOTests;
038import org.apache.hadoop.hbase.testclassification.MediumTests;
039import org.apache.hadoop.hbase.util.BloomFilterFactory;
040import org.apache.hadoop.hbase.util.BloomFilterUtil;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.junit.ClassRule;
043import org.junit.Test;
044import org.junit.experimental.categories.Category;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048@Category({IOTests.class, MediumTests.class})
049public class TestSeekBeforeWithInlineBlocks {
050
051  @ClassRule
052  public static final HBaseClassTestRule CLASS_RULE =
053      HBaseClassTestRule.forClass(TestSeekBeforeWithInlineBlocks.class);
054
055  private static final Logger LOG = LoggerFactory.getLogger(TestSeekBeforeWithInlineBlocks.class);
056
057  private static final HBaseTestingUtility TEST_UTIL =
058      new HBaseTestingUtility();
059
060  private static final int NUM_KV = 10000;
061
062  private static final int DATA_BLOCK_SIZE = 4096;
063  private static final int BLOOM_BLOCK_SIZE = 1024;
064  private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 };
065  private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 };
066
067  private static final Random RAND = new Random(192537);
068  private static final byte[] FAM = Bytes.toBytes("family");
069
070  private FileSystem fs;
071  private Configuration conf;
072
073  /**
074   * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs
075   * to know the size of that data block, which it calculates using current data block offset and
076   * the previous data block offset.  This fails to work when there are leaf-level index blocks in
077   * the scannable section of the HFile, i.e. starting in HFileV2.  This test will try seekBefore()
078   * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed.  This
079   * bug also happens for inline Bloom blocks for the same reasons.
080   */
081  @Test
082  public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException {
083    conf = TEST_UTIL.getConfiguration();
084    TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
085
086    // Try out different HFile versions to ensure reverse scan works on each version
087    for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS;
088            hfileVersion <= HFile.MAX_FORMAT_VERSION; hfileVersion++) {
089
090      conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion);
091      fs = HFileSystem.get(conf);
092
093      // Try out different bloom types because inline Bloom blocks break seekBefore()
094      for (BloomType bloomType : BloomType.values()) {
095
096        // Test out HFile block indices of various sizes/levels
097        for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) {
098          int indexBlockSize = INDEX_CHUNK_SIZES[testI];
099          int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
100
101          LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s",
102            hfileVersion, bloomType, expectedNumLevels));
103
104          conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
105          conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE);
106          conf.setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
107
108          Cell[] cells = new Cell[NUM_KV];
109
110          Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
111            String.format("testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s",
112              hfileVersion, bloomType, testI));
113
114          // Disable caching to prevent it from hiding any bugs in block seeks/reads
115          conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
116          CacheConfig cacheConf = new CacheConfig(conf);
117
118          // Write the HFile
119          {
120            HFileContext meta = new HFileContextBuilder()
121                                .withBlockSize(DATA_BLOCK_SIZE)
122                                .build();
123
124            StoreFileWriter storeFileWriter =
125                new StoreFileWriter.Builder(conf, cacheConf, fs)
126              .withFilePath(hfilePath)
127              .withFileContext(meta)
128              .withBloomType(bloomType)
129              .build();
130
131            for (int i = 0; i < NUM_KV; i++) {
132              byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i);
133              byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND);
134              byte[] value = RandomKeyValueUtil.randomValue(RAND);
135              KeyValue kv = new KeyValue(row, FAM, qual, value);
136
137              storeFileWriter.append(kv);
138              cells[i] = kv;
139            }
140
141            storeFileWriter.close();
142          }
143
144          // Read the HFile
145          HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
146
147          // Sanity check the HFile index level
148          assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
149
150          // Check that we can seekBefore in either direction and with both pread
151          // enabled and disabled
152          for (boolean pread : new boolean[] { false, true }) {
153            HFileScanner scanner = reader.getScanner(true, pread);
154            checkNoSeekBefore(cells, scanner, 0);
155            for (int i = 1; i < NUM_KV; i++) {
156              checkSeekBefore(cells, scanner, i);
157              checkCell(cells[i-1], scanner.getCell());
158            }
159            assertTrue(scanner.seekTo());
160            for (int i = NUM_KV - 1; i >= 1; i--) {
161              checkSeekBefore(cells, scanner, i);
162              checkCell(cells[i-1], scanner.getCell());
163            }
164            checkNoSeekBefore(cells, scanner, 0);
165            scanner.close();
166          }
167
168          reader.close();
169        }
170      }
171    }
172  }
173
174  private void checkSeekBefore(Cell[] cells, HFileScanner scanner, int i)
175      throws IOException {
176    assertEquals("Failed to seek to the key before #" + i + " ("
177        + CellUtil.getCellKeyAsString(cells[i]) + ")", true,
178        scanner.seekBefore(cells[i]));
179  }
180
181  private void checkNoSeekBefore(Cell[] cells, HFileScanner scanner, int i)
182      throws IOException {
183    assertEquals("Incorrectly succeeded in seeking to before first key ("
184        + CellUtil.getCellKeyAsString(cells[i]) + ")", false,
185        scanner.seekBefore(cells[i]));
186  }
187
188  /** Check a key/value pair after it was read by the reader */
189  private void checkCell(Cell expected, Cell actual) {
190    assertTrue(String.format("Expected key %s, but was %s",
191      CellUtil.getCellKeyAsString(expected), CellUtil.getCellKeyAsString(actual)),
192      CellUtil.equals(expected, actual));
193  }
194}
195