001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNull; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.List; 027import java.util.concurrent.atomic.AtomicInteger; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.hbase.Cell; 032import org.apache.hadoop.hbase.CellComparatorImpl; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.KeyValue; 037import org.apache.hadoop.hbase.KeyValueUtil; 038import org.apache.hadoop.hbase.io.hfile.CacheConfig; 039import org.apache.hadoop.hbase.io.hfile.HFile; 040import org.apache.hadoop.hbase.io.hfile.HFileContext; 041import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 042import org.apache.hadoop.hbase.io.hfile.HFileInfo; 043import org.apache.hadoop.hbase.io.hfile.HFileScanner; 044import org.apache.hadoop.hbase.io.hfile.ReaderContext; 045import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 046import org.apache.hadoop.hbase.testclassification.IOTests; 047import org.apache.hadoop.hbase.testclassification.SmallTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.junit.AfterClass; 050import org.junit.BeforeClass; 051import org.junit.ClassRule; 052import org.junit.Test; 053import org.junit.experimental.categories.Category; 054 055@Category({ IOTests.class, SmallTests.class }) 056public class TestHalfStoreFileReader { 057 058 @ClassRule 059 public static final HBaseClassTestRule CLASS_RULE = 060 HBaseClassTestRule.forClass(TestHalfStoreFileReader.class); 061 062 private static HBaseTestingUtility TEST_UTIL; 063 064 @BeforeClass 065 public static void setupBeforeClass() throws Exception { 066 TEST_UTIL = new HBaseTestingUtility(); 067 } 068 069 @AfterClass 070 public static void tearDownAfterClass() throws Exception { 071 TEST_UTIL.cleanupTestDir(); 072 } 073 074 /** 075 * Test the scanner and reseek of a half hfile scanner. The scanner API demands that seekTo and 076 * reseekTo() only return < 0 if the key lies before the start of the file (with no position on 077 * the scanner). Returning 0 if perfect match (rare), and return > 1 if we got an imperfect match. 078 * The latter case being the most common, we should generally be returning 1, and if we do, there 079 * may or may not be a 'next' in the scanner/file. A bug in the half file scanner was returning -1 080 * at the end of the bottom half, and that was causing the infrastructure above to go null causing 081 * NPEs and other problems. This test reproduces that failure, and also tests both the bottom and 082 * top of the file while we are at it. n 083 */ 084 @Test 085 public void testHalfScanAndReseek() throws IOException { 086 String root_dir = TEST_UTIL.getDataTestDir().toString(); 087 Path p = new Path(root_dir, "test"); 088 089 Configuration conf = TEST_UTIL.getConfiguration(); 090 FileSystem fs = FileSystem.get(conf); 091 CacheConfig cacheConf = new CacheConfig(conf); 092 HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); 093 HFile.Writer w = 094 HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create(); 095 096 // write some things. 097 List<KeyValue> items = genSomeKeys(); 098 for (KeyValue kv : items) { 099 w.append(kv); 100 } 101 w.close(); 102 103 HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf); 104 Cell midKV = r.midKey().get(); 105 byte[] midkey = CellUtil.cloneRow(midKV); 106 107 // System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); 108 109 Reference bottom = new Reference(midkey, Reference.Range.bottom); 110 doTestOfScanAndReseek(p, fs, bottom, cacheConf); 111 112 Reference top = new Reference(midkey, Reference.Range.top); 113 doTestOfScanAndReseek(p, fs, top, cacheConf); 114 115 r.close(); 116 } 117 118 private void doTestOfScanAndReseek(Path p, FileSystem fs, Reference bottom, CacheConfig cacheConf) 119 throws IOException { 120 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build(); 121 HFileInfo fileInfo = new HFileInfo(context, TEST_UTIL.getConfiguration()); 122 final HalfStoreFileReader halfreader = new HalfStoreFileReader(context, fileInfo, cacheConf, 123 bottom, new AtomicInteger(0), TEST_UTIL.getConfiguration()); 124 fileInfo.initMetaAndIndex(halfreader.getHFileReader()); 125 halfreader.loadFileInfo(); 126 final HFileScanner scanner = halfreader.getScanner(false, false); 127 128 scanner.seekTo(); 129 Cell curr; 130 do { 131 curr = scanner.getCell(); 132 KeyValue reseekKv = getLastOnCol(curr); 133 int ret = scanner.reseekTo(reseekKv); 134 assertTrue("reseek to returned: " + ret, ret > 0); 135 // System.out.println(curr + ": " + ret); 136 } while (scanner.next()); 137 138 int ret = scanner.reseekTo(getLastOnCol(curr)); 139 // System.out.println("Last reseek: " + ret); 140 assertTrue(ret > 0); 141 142 halfreader.close(true); 143 } 144 145 // Tests the scanner on an HFile that is backed by HalfStoreFiles 146 @Test 147 public void testHalfScanner() throws IOException { 148 String root_dir = TEST_UTIL.getDataTestDir().toString(); 149 Path p = new Path(root_dir, "test"); 150 Configuration conf = TEST_UTIL.getConfiguration(); 151 FileSystem fs = FileSystem.get(conf); 152 CacheConfig cacheConf = new CacheConfig(conf); 153 HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); 154 HFile.Writer w = 155 HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create(); 156 157 // write some things. 158 List<KeyValue> items = genSomeKeys(); 159 for (KeyValue kv : items) { 160 w.append(kv); 161 } 162 w.close(); 163 164 HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf); 165 Cell midKV = r.midKey().get(); 166 byte[] midkey = CellUtil.cloneRow(midKV); 167 168 Reference bottom = new Reference(midkey, Reference.Range.bottom); 169 Reference top = new Reference(midkey, Reference.Range.top); 170 171 // Ugly code to get the item before the midkey 172 KeyValue beforeMidKey = null; 173 for (KeyValue item : items) { 174 if (CellComparatorImpl.COMPARATOR.compare(item, midKV) >= 0) { 175 break; 176 } 177 beforeMidKey = item; 178 } 179 System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); 180 System.out.println("beforeMidKey: " + beforeMidKey); 181 182 // Seek on the splitKey, should be in top, not in bottom 183 Cell foundKeyValue = doTestOfSeekBefore(p, fs, bottom, midKV, cacheConf); 184 assertEquals(beforeMidKey, foundKeyValue); 185 186 // Seek tot the last thing should be the penultimate on the top, the one before the midkey on 187 // the bottom. 188 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(items.size() - 1), cacheConf); 189 assertEquals(items.get(items.size() - 2), foundKeyValue); 190 191 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(items.size() - 1), cacheConf); 192 assertEquals(beforeMidKey, foundKeyValue); 193 194 // Try and seek before something that is in the bottom. 195 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(0), cacheConf); 196 assertNull(foundKeyValue); 197 198 // Try and seek before the first thing. 199 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(0), cacheConf); 200 assertNull(foundKeyValue); 201 202 // Try and seek before the second thing in the top and bottom. 203 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(1), cacheConf); 204 assertNull(foundKeyValue); 205 206 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(1), cacheConf); 207 assertEquals(items.get(0), foundKeyValue); 208 209 // Try to seek before the splitKey in the top file 210 foundKeyValue = doTestOfSeekBefore(p, fs, top, midKV, cacheConf); 211 assertNull(foundKeyValue); 212 } 213 214 private Cell doTestOfSeekBefore(Path p, FileSystem fs, Reference bottom, Cell seekBefore, 215 CacheConfig cacheConfig) throws IOException { 216 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build(); 217 HFileInfo fileInfo = new HFileInfo(context, TEST_UTIL.getConfiguration()); 218 final HalfStoreFileReader halfreader = new HalfStoreFileReader(context, fileInfo, cacheConfig, 219 bottom, new AtomicInteger(0), TEST_UTIL.getConfiguration()); 220 fileInfo.initMetaAndIndex(halfreader.getHFileReader()); 221 halfreader.loadFileInfo(); 222 final HFileScanner scanner = halfreader.getScanner(false, false); 223 scanner.seekBefore(seekBefore); 224 return scanner.getCell(); 225 } 226 227 private KeyValue getLastOnCol(Cell curr) { 228 return KeyValueUtil.createLastOnRow(curr.getRowArray(), curr.getRowOffset(), 229 curr.getRowLength(), curr.getFamilyArray(), curr.getFamilyOffset(), curr.getFamilyLength(), 230 curr.getQualifierArray(), curr.getQualifierOffset(), curr.getQualifierLength()); 231 } 232 233 static final int SIZE = 1000; 234 235 static byte[] _b(String s) { 236 return Bytes.toBytes(s); 237 } 238 239 List<KeyValue> genSomeKeys() { 240 List<KeyValue> ret = new ArrayList<>(SIZE); 241 for (int i = 0; i < SIZE; i++) { 242 KeyValue kv = 243 new KeyValue(_b(String.format("row_%04d", i)), _b("family"), _b("qualifier"), 1000, // timestamp 244 _b("value")); 245 ret.add(kv); 246 } 247 return ret; 248 } 249}