001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.io.IOException;
024import java.util.Random;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.KeyValue;
034import org.apache.hadoop.hbase.fs.HFileSystem;
035import org.apache.hadoop.hbase.regionserver.BloomType;
036import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
037import org.apache.hadoop.hbase.testclassification.IOTests;
038import org.apache.hadoop.hbase.testclassification.MediumTests;
039import org.apache.hadoop.hbase.util.BloomFilterFactory;
040import org.apache.hadoop.hbase.util.Bytes;
041import org.junit.ClassRule;
042import org.junit.Test;
043import org.junit.experimental.categories.Category;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047@Category({IOTests.class, MediumTests.class})
048public class TestSeekBeforeWithInlineBlocks {
049
050  @ClassRule
051  public static final HBaseClassTestRule CLASS_RULE =
052      HBaseClassTestRule.forClass(TestSeekBeforeWithInlineBlocks.class);
053
054  private static final Logger LOG = LoggerFactory.getLogger(TestSeekBeforeWithInlineBlocks.class);
055
056  private static final HBaseTestingUtility TEST_UTIL =
057      new HBaseTestingUtility();
058
059  private static final int NUM_KV = 10000;
060
061  private static final int DATA_BLOCK_SIZE = 4096;
062  private static final int BLOOM_BLOCK_SIZE = 1024;
063  private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 };
064  private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 };
065
066  private static final Random RAND = new Random(192537);
067  private static final byte[] FAM = Bytes.toBytes("family");
068
069  private FileSystem fs;
070  private Configuration conf;
071
072  /**
073   * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs
074   * to know the size of that data block, which it calculates using current data block offset and
075   * the previous data block offset.  This fails to work when there are leaf-level index blocks in
076   * the scannable section of the HFile, i.e. starting in HFileV2.  This test will try seekBefore()
077   * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed.  This
078   * bug also happens for inline Bloom blocks for the same reasons.
079   */
080  @Test
081  public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException {
082    conf = TEST_UTIL.getConfiguration();
083
084    // Try out different HFile versions to ensure reverse scan works on each version
085    for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS;
086            hfileVersion <= HFile.MAX_FORMAT_VERSION; hfileVersion++) {
087
088      conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion);
089      fs = HFileSystem.get(conf);
090
091      // Try out different bloom types because inline Bloom blocks break seekBefore()
092      for (BloomType bloomType : BloomType.values()) {
093
094        // Test out HFile block indices of various sizes/levels
095        for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) {
096          int indexBlockSize = INDEX_CHUNK_SIZES[testI];
097          int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
098
099          LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s",
100            hfileVersion, bloomType, expectedNumLevels));
101
102          conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
103          conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE);
104
105          Cell[] cells = new Cell[NUM_KV];
106
107          Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
108            String.format("testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s",
109              hfileVersion, bloomType, testI));
110
111          // Disable caching to prevent it from hiding any bugs in block seeks/reads
112          conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
113          CacheConfig cacheConf = new CacheConfig(conf);
114
115          // Write the HFile
116          {
117            HFileContext meta = new HFileContextBuilder()
118                                .withBlockSize(DATA_BLOCK_SIZE)
119                                .build();
120
121            StoreFileWriter storeFileWriter =
122                new StoreFileWriter.Builder(conf, cacheConf, fs)
123              .withFilePath(hfilePath)
124              .withFileContext(meta)
125              .withBloomType(bloomType)
126              .build();
127
128            for (int i = 0; i < NUM_KV; i++) {
129              byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i);
130              byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND);
131              byte[] value = RandomKeyValueUtil.randomValue(RAND);
132              KeyValue kv = new KeyValue(row, FAM, qual, value);
133
134              storeFileWriter.append(kv);
135              cells[i] = kv;
136            }
137
138            storeFileWriter.close();
139          }
140
141          // Read the HFile
142          HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
143
144          // Sanity check the HFile index level
145          assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
146
147          // Check that we can seekBefore in either direction and with both pread
148          // enabled and disabled
149          for (boolean pread : new boolean[] { false, true }) {
150            HFileScanner scanner = reader.getScanner(true, pread);
151            checkNoSeekBefore(cells, scanner, 0);
152            for (int i = 1; i < NUM_KV; i++) {
153              checkSeekBefore(cells, scanner, i);
154              checkCell(cells[i-1], scanner.getCell());
155            }
156            assertTrue(scanner.seekTo());
157            for (int i = NUM_KV - 1; i >= 1; i--) {
158              checkSeekBefore(cells, scanner, i);
159              checkCell(cells[i-1], scanner.getCell());
160            }
161            checkNoSeekBefore(cells, scanner, 0);
162            scanner.close();
163          }
164
165          reader.close();
166        }
167      }
168    }
169  }
170
171  private void checkSeekBefore(Cell[] cells, HFileScanner scanner, int i)
172      throws IOException {
173    assertEquals("Failed to seek to the key before #" + i + " ("
174        + CellUtil.getCellKeyAsString(cells[i]) + ")", true,
175        scanner.seekBefore(cells[i]));
176  }
177
178  private void checkNoSeekBefore(Cell[] cells, HFileScanner scanner, int i)
179      throws IOException {
180    assertEquals("Incorrectly succeeded in seeking to before first key ("
181        + CellUtil.getCellKeyAsString(cells[i]) + ")", false,
182        scanner.seekBefore(cells[i]));
183  }
184
185  /** Check a key/value pair after it was read by the reader */
186  private void checkCell(Cell expected, Cell actual) {
187    assertTrue(String.format("Expected key %s, but was %s",
188      CellUtil.getCellKeyAsString(expected), CellUtil.getCellKeyAsString(actual)),
189      CellUtil.equals(expected, actual));
190  }
191}
192