001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.io.IOException; 024import java.util.Random; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.hadoop.hbase.HBaseClassTestRule; 031import org.apache.hadoop.hbase.HBaseTestingUtility; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.KeyValue; 034import org.apache.hadoop.hbase.fs.HFileSystem; 035import org.apache.hadoop.hbase.regionserver.BloomType; 036import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 037import org.apache.hadoop.hbase.testclassification.IOTests; 038import org.apache.hadoop.hbase.testclassification.MediumTests; 039import org.apache.hadoop.hbase.util.BloomFilterFactory; 040import org.apache.hadoop.hbase.util.BloomFilterUtil; 041import org.apache.hadoop.hbase.util.Bytes; 042import org.junit.ClassRule; 043import org.junit.Test; 044import org.junit.experimental.categories.Category; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048@Category({ IOTests.class, MediumTests.class }) 049public class TestSeekBeforeWithInlineBlocks { 050 051 @ClassRule 052 public static final HBaseClassTestRule CLASS_RULE = 053 HBaseClassTestRule.forClass(TestSeekBeforeWithInlineBlocks.class); 054 055 private static final Logger LOG = LoggerFactory.getLogger(TestSeekBeforeWithInlineBlocks.class); 056 057 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 058 059 private static final int NUM_KV = 10000; 060 061 private static final int DATA_BLOCK_SIZE = 4096; 062 private static final int BLOOM_BLOCK_SIZE = 1024; 063 private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 }; 064 private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 }; 065 066 private static final Random RAND = new Random(192537); 067 private static final byte[] FAM = Bytes.toBytes("family"); 068 069 private FileSystem fs; 070 private Configuration conf; 071 072 /** 073 * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs 074 * to know the size of that data block, which it calculates using current data block offset and 075 * the previous data block offset. This fails to work when there are leaf-level index blocks in 076 * the scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore() 077 * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This 078 * bug also happens for inline Bloom blocks for the same reasons. 079 */ 080 @Test 081 public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException { 082 conf = TEST_UTIL.getConfiguration(); 083 TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); 084 085 // Try out different HFile versions to ensure reverse scan works on each version 086 for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS; hfileVersion 087 <= HFile.MAX_FORMAT_VERSION; hfileVersion++) { 088 089 conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion); 090 fs = HFileSystem.get(conf); 091 092 // Try out different bloom types because inline Bloom blocks break seekBefore() 093 for (BloomType bloomType : BloomType.values()) { 094 095 // Test out HFile block indices of various sizes/levels 096 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) { 097 int indexBlockSize = INDEX_CHUNK_SIZES[testI]; 098 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; 099 100 LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s", 101 hfileVersion, bloomType, expectedNumLevels)); 102 103 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); 104 conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); 105 conf.setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); 106 107 Cell[] cells = new Cell[NUM_KV]; 108 109 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), String.format( 110 "testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s", hfileVersion, bloomType, testI)); 111 112 // Disable caching to prevent it from hiding any bugs in block seeks/reads 113 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); 114 CacheConfig cacheConf = new CacheConfig(conf); 115 116 // Write the HFile 117 { 118 HFileContext meta = new HFileContextBuilder().withBlockSize(DATA_BLOCK_SIZE).build(); 119 120 StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs) 121 .withFilePath(hfilePath).withFileContext(meta).withBloomType(bloomType).build(); 122 123 for (int i = 0; i < NUM_KV; i++) { 124 byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i); 125 byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND); 126 byte[] value = RandomKeyValueUtil.randomValue(RAND); 127 KeyValue kv = new KeyValue(row, FAM, qual, value); 128 129 storeFileWriter.append(kv); 130 cells[i] = kv; 131 } 132 133 storeFileWriter.close(); 134 } 135 136 // Read the HFile 137 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 138 139 // Sanity check the HFile index level 140 assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); 141 142 // Check that we can seekBefore in either direction and with both pread 143 // enabled and disabled 144 for (boolean pread : new boolean[] { false, true }) { 145 HFileScanner scanner = reader.getScanner(conf, true, pread); 146 checkNoSeekBefore(cells, scanner, 0); 147 for (int i = 1; i < NUM_KV; i++) { 148 checkSeekBefore(cells, scanner, i); 149 checkCell(cells[i - 1], scanner.getCell()); 150 } 151 assertTrue(scanner.seekTo()); 152 for (int i = NUM_KV - 1; i >= 1; i--) { 153 checkSeekBefore(cells, scanner, i); 154 checkCell(cells[i - 1], scanner.getCell()); 155 } 156 checkNoSeekBefore(cells, scanner, 0); 157 scanner.close(); 158 } 159 160 reader.close(); 161 } 162 } 163 } 164 } 165 166 private void checkSeekBefore(Cell[] cells, HFileScanner scanner, int i) throws IOException { 167 assertEquals( 168 "Failed to seek to the key before #" + i + " (" + CellUtil.getCellKeyAsString(cells[i]) + ")", 169 true, scanner.seekBefore(cells[i])); 170 } 171 172 private void checkNoSeekBefore(Cell[] cells, HFileScanner scanner, int i) throws IOException { 173 assertEquals("Incorrectly succeeded in seeking to before first key (" 174 + CellUtil.getCellKeyAsString(cells[i]) + ")", false, scanner.seekBefore(cells[i])); 175 } 176 177 /** Check a key/value pair after it was read by the reader */ 178 private void checkCell(Cell expected, Cell actual) { 179 assertTrue(String.format("Expected key %s, but was %s", CellUtil.getCellKeyAsString(expected), 180 CellUtil.getCellKeyAsString(actual)), CellUtil.equals(expected, actual)); 181 } 182}