001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNull; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.List; 027import java.util.concurrent.atomic.AtomicInteger; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.hbase.Cell; 032import org.apache.hadoop.hbase.CellComparatorImpl; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.KeyValue; 037import org.apache.hadoop.hbase.KeyValueUtil; 038import org.apache.hadoop.hbase.io.hfile.CacheConfig; 039import org.apache.hadoop.hbase.io.hfile.HFile; 040import org.apache.hadoop.hbase.io.hfile.HFileContext; 041import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 042import org.apache.hadoop.hbase.io.hfile.HFileInfo; 043import org.apache.hadoop.hbase.io.hfile.HFileScanner; 044import org.apache.hadoop.hbase.io.hfile.ReaderContext; 045import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 046import org.apache.hadoop.hbase.testclassification.IOTests; 047import org.apache.hadoop.hbase.testclassification.SmallTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.junit.AfterClass; 050import org.junit.BeforeClass; 051import org.junit.ClassRule; 052import org.junit.Test; 053import org.junit.experimental.categories.Category; 054 055@Category({ IOTests.class, SmallTests.class }) 056public class TestHalfStoreFileReader { 057 058 @ClassRule 059 public static final HBaseClassTestRule CLASS_RULE = 060 HBaseClassTestRule.forClass(TestHalfStoreFileReader.class); 061 062 private static HBaseTestingUtility TEST_UTIL; 063 064 @BeforeClass 065 public static void setupBeforeClass() throws Exception { 066 TEST_UTIL = new HBaseTestingUtility(); 067 } 068 069 @AfterClass 070 public static void tearDownAfterClass() throws Exception { 071 TEST_UTIL.cleanupTestDir(); 072 } 073 074 /** 075 * Test the scanner and reseek of a half hfile scanner. The scanner API demands that seekTo and 076 * reseekTo() only return < 0 if the key lies before the start of the file (with no position on 077 * the scanner). Returning 0 if perfect match (rare), and return > 1 if we got an imperfect match. 078 * The latter case being the most common, we should generally be returning 1, and if we do, there 079 * may or may not be a 'next' in the scanner/file. A bug in the half file scanner was returning -1 080 * at the end of the bottom half, and that was causing the infrastructure above to go null causing 081 * NPEs and other problems. This test reproduces that failure, and also tests both the bottom and 082 * top of the file while we are at it. 083 * @throws IOException 084 */ 085 @Test 086 public void testHalfScanAndReseek() throws IOException { 087 String root_dir = TEST_UTIL.getDataTestDir().toString(); 088 Path p = new Path(root_dir, "test"); 089 090 Configuration conf = TEST_UTIL.getConfiguration(); 091 FileSystem fs = FileSystem.get(conf); 092 CacheConfig cacheConf = new CacheConfig(conf); 093 HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); 094 HFile.Writer w = 095 HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create(); 096 097 // write some things. 098 List<KeyValue> items = genSomeKeys(); 099 for (KeyValue kv : items) { 100 w.append(kv); 101 } 102 w.close(); 103 104 HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf); 105 Cell midKV = r.midKey().get(); 106 byte[] midkey = CellUtil.cloneRow(midKV); 107 108 // System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); 109 110 Reference bottom = new Reference(midkey, Reference.Range.bottom); 111 doTestOfScanAndReseek(p, fs, bottom, cacheConf); 112 113 Reference top = new Reference(midkey, Reference.Range.top); 114 doTestOfScanAndReseek(p, fs, top, cacheConf); 115 116 r.close(); 117 } 118 119 private void doTestOfScanAndReseek(Path p, FileSystem fs, Reference bottom, CacheConfig cacheConf) 120 throws IOException { 121 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build(); 122 HFileInfo fileInfo = new HFileInfo(context, TEST_UTIL.getConfiguration()); 123 final HalfStoreFileReader halfreader = new HalfStoreFileReader(context, fileInfo, cacheConf, 124 bottom, new AtomicInteger(0), TEST_UTIL.getConfiguration()); 125 fileInfo.initMetaAndIndex(halfreader.getHFileReader()); 126 halfreader.loadFileInfo(); 127 final HFileScanner scanner = halfreader.getScanner(false, false); 128 129 scanner.seekTo(); 130 Cell curr; 131 do { 132 curr = scanner.getCell(); 133 KeyValue reseekKv = getLastOnCol(curr); 134 int ret = scanner.reseekTo(reseekKv); 135 assertTrue("reseek to returned: " + ret, ret > 0); 136 // System.out.println(curr + ": " + ret); 137 } while (scanner.next()); 138 139 int ret = scanner.reseekTo(getLastOnCol(curr)); 140 // System.out.println("Last reseek: " + ret); 141 assertTrue(ret > 0); 142 143 halfreader.close(true); 144 } 145 146 // Tests the scanner on an HFile that is backed by HalfStoreFiles 147 @Test 148 public void testHalfScanner() throws IOException { 149 String root_dir = TEST_UTIL.getDataTestDir().toString(); 150 Path p = new Path(root_dir, "test"); 151 Configuration conf = TEST_UTIL.getConfiguration(); 152 FileSystem fs = FileSystem.get(conf); 153 CacheConfig cacheConf = new CacheConfig(conf); 154 HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); 155 HFile.Writer w = 156 HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create(); 157 158 // write some things. 159 List<KeyValue> items = genSomeKeys(); 160 for (KeyValue kv : items) { 161 w.append(kv); 162 } 163 w.close(); 164 165 HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf); 166 Cell midKV = r.midKey().get(); 167 byte[] midkey = CellUtil.cloneRow(midKV); 168 169 Reference bottom = new Reference(midkey, Reference.Range.bottom); 170 Reference top = new Reference(midkey, Reference.Range.top); 171 172 // Ugly code to get the item before the midkey 173 KeyValue beforeMidKey = null; 174 for (KeyValue item : items) { 175 if (CellComparatorImpl.COMPARATOR.compare(item, midKV) >= 0) { 176 break; 177 } 178 beforeMidKey = item; 179 } 180 System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); 181 System.out.println("beforeMidKey: " + beforeMidKey); 182 183 // Seek on the splitKey, should be in top, not in bottom 184 Cell foundKeyValue = doTestOfSeekBefore(p, fs, bottom, midKV, cacheConf); 185 assertEquals(beforeMidKey, foundKeyValue); 186 187 // Seek tot the last thing should be the penultimate on the top, the one before the midkey on 188 // the bottom. 189 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(items.size() - 1), cacheConf); 190 assertEquals(items.get(items.size() - 2), foundKeyValue); 191 192 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(items.size() - 1), cacheConf); 193 assertEquals(beforeMidKey, foundKeyValue); 194 195 // Try and seek before something that is in the bottom. 196 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(0), cacheConf); 197 assertNull(foundKeyValue); 198 199 // Try and seek before the first thing. 200 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(0), cacheConf); 201 assertNull(foundKeyValue); 202 203 // Try and seek before the second thing in the top and bottom. 204 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(1), cacheConf); 205 assertNull(foundKeyValue); 206 207 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(1), cacheConf); 208 assertEquals(items.get(0), foundKeyValue); 209 210 // Try to seek before the splitKey in the top file 211 foundKeyValue = doTestOfSeekBefore(p, fs, top, midKV, cacheConf); 212 assertNull(foundKeyValue); 213 } 214 215 private Cell doTestOfSeekBefore(Path p, FileSystem fs, Reference bottom, Cell seekBefore, 216 CacheConfig cacheConfig) throws IOException { 217 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build(); 218 HFileInfo fileInfo = new HFileInfo(context, TEST_UTIL.getConfiguration()); 219 final HalfStoreFileReader halfreader = new HalfStoreFileReader(context, fileInfo, cacheConfig, 220 bottom, new AtomicInteger(0), TEST_UTIL.getConfiguration()); 221 fileInfo.initMetaAndIndex(halfreader.getHFileReader()); 222 halfreader.loadFileInfo(); 223 final HFileScanner scanner = halfreader.getScanner(false, false); 224 scanner.seekBefore(seekBefore); 225 return scanner.getCell(); 226 } 227 228 private KeyValue getLastOnCol(Cell curr) { 229 return KeyValueUtil.createLastOnRow(curr.getRowArray(), curr.getRowOffset(), 230 curr.getRowLength(), curr.getFamilyArray(), curr.getFamilyOffset(), curr.getFamilyLength(), 231 curr.getQualifierArray(), curr.getQualifierOffset(), curr.getQualifierLength()); 232 } 233 234 static final int SIZE = 1000; 235 236 static byte[] _b(String s) { 237 return Bytes.toBytes(s); 238 } 239 240 List<KeyValue> genSomeKeys() { 241 List<KeyValue> ret = new ArrayList<>(SIZE); 242 for (int i = 0; i < SIZE; i++) { 243 KeyValue kv = 244 new KeyValue(_b(String.format("row_%04d", i)), _b("family"), _b("qualifier"), 1000, // timestamp 245 _b("value")); 246 ret.add(kv); 247 } 248 return ret; 249 } 250}