001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.io.IOException; 024import java.util.Random; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.hadoop.hbase.HBaseClassTestRule; 031import org.apache.hadoop.hbase.HBaseTestingUtility; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.KeyValue; 034import org.apache.hadoop.hbase.fs.HFileSystem; 035import org.apache.hadoop.hbase.regionserver.BloomType; 036import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 037import org.apache.hadoop.hbase.testclassification.IOTests; 038import org.apache.hadoop.hbase.testclassification.MediumTests; 039import org.apache.hadoop.hbase.util.BloomFilterFactory; 040import org.apache.hadoop.hbase.util.BloomFilterUtil; 041import org.apache.hadoop.hbase.util.Bytes; 042import org.junit.ClassRule; 043import org.junit.Test; 044import org.junit.experimental.categories.Category; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048@Category({IOTests.class, MediumTests.class}) 049public class TestSeekBeforeWithInlineBlocks { 050 051 @ClassRule 052 public static final HBaseClassTestRule CLASS_RULE = 053 HBaseClassTestRule.forClass(TestSeekBeforeWithInlineBlocks.class); 054 055 private static final Logger LOG = LoggerFactory.getLogger(TestSeekBeforeWithInlineBlocks.class); 056 057 private static final HBaseTestingUtility TEST_UTIL = 058 new HBaseTestingUtility(); 059 060 private static final int NUM_KV = 10000; 061 062 private static final int DATA_BLOCK_SIZE = 4096; 063 private static final int BLOOM_BLOCK_SIZE = 1024; 064 private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 }; 065 private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 }; 066 067 private static final Random RAND = new Random(192537); 068 private static final byte[] FAM = Bytes.toBytes("family"); 069 070 private FileSystem fs; 071 private Configuration conf; 072 073 /** 074 * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs 075 * to know the size of that data block, which it calculates using current data block offset and 076 * the previous data block offset. This fails to work when there are leaf-level index blocks in 077 * the scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore() 078 * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This 079 * bug also happens for inline Bloom blocks for the same reasons. 080 */ 081 @Test 082 public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException { 083 conf = TEST_UTIL.getConfiguration(); 084 TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); 085 086 // Try out different HFile versions to ensure reverse scan works on each version 087 for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS; 088 hfileVersion <= HFile.MAX_FORMAT_VERSION; hfileVersion++) { 089 090 conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion); 091 fs = HFileSystem.get(conf); 092 093 // Try out different bloom types because inline Bloom blocks break seekBefore() 094 for (BloomType bloomType : BloomType.values()) { 095 096 // Test out HFile block indices of various sizes/levels 097 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) { 098 int indexBlockSize = INDEX_CHUNK_SIZES[testI]; 099 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; 100 101 LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s", 102 hfileVersion, bloomType, expectedNumLevels)); 103 104 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); 105 conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); 106 conf.setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); 107 108 Cell[] cells = new Cell[NUM_KV]; 109 110 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), 111 String.format("testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s", 112 hfileVersion, bloomType, testI)); 113 114 // Disable caching to prevent it from hiding any bugs in block seeks/reads 115 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); 116 CacheConfig cacheConf = new CacheConfig(conf); 117 118 // Write the HFile 119 { 120 HFileContext meta = new HFileContextBuilder() 121 .withBlockSize(DATA_BLOCK_SIZE) 122 .build(); 123 124 StoreFileWriter storeFileWriter = 125 new StoreFileWriter.Builder(conf, cacheConf, fs) 126 .withFilePath(hfilePath) 127 .withFileContext(meta) 128 .withBloomType(bloomType) 129 .build(); 130 131 for (int i = 0; i < NUM_KV; i++) { 132 byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i); 133 byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND); 134 byte[] value = RandomKeyValueUtil.randomValue(RAND); 135 KeyValue kv = new KeyValue(row, FAM, qual, value); 136 137 storeFileWriter.append(kv); 138 cells[i] = kv; 139 } 140 141 storeFileWriter.close(); 142 } 143 144 // Read the HFile 145 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 146 147 // Sanity check the HFile index level 148 assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); 149 150 // Check that we can seekBefore in either direction and with both pread 151 // enabled and disabled 152 for (boolean pread : new boolean[] { false, true }) { 153 HFileScanner scanner = reader.getScanner(true, pread); 154 checkNoSeekBefore(cells, scanner, 0); 155 for (int i = 1; i < NUM_KV; i++) { 156 checkSeekBefore(cells, scanner, i); 157 checkCell(cells[i-1], scanner.getCell()); 158 } 159 assertTrue(scanner.seekTo()); 160 for (int i = NUM_KV - 1; i >= 1; i--) { 161 checkSeekBefore(cells, scanner, i); 162 checkCell(cells[i-1], scanner.getCell()); 163 } 164 checkNoSeekBefore(cells, scanner, 0); 165 scanner.close(); 166 } 167 168 reader.close(); 169 } 170 } 171 } 172 } 173 174 private void checkSeekBefore(Cell[] cells, HFileScanner scanner, int i) 175 throws IOException { 176 assertEquals("Failed to seek to the key before #" + i + " (" 177 + CellUtil.getCellKeyAsString(cells[i]) + ")", true, 178 scanner.seekBefore(cells[i])); 179 } 180 181 private void checkNoSeekBefore(Cell[] cells, HFileScanner scanner, int i) 182 throws IOException { 183 assertEquals("Incorrectly succeeded in seeking to before first key (" 184 + CellUtil.getCellKeyAsString(cells[i]) + ")", false, 185 scanner.seekBefore(cells[i])); 186 } 187 188 /** Check a key/value pair after it was read by the reader */ 189 private void checkCell(Cell expected, Cell actual) { 190 assertTrue(String.format("Expected key %s, but was %s", 191 CellUtil.getCellKeyAsString(expected), CellUtil.getCellKeyAsString(actual)), 192 CellUtil.equals(expected, actual)); 193 } 194} 195