001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.Iterator; 028import java.util.List; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.ArrayBackedTag; 034import org.apache.hadoop.hbase.ByteBufferKeyValue; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.HBaseClassTestRule; 037import org.apache.hadoop.hbase.HBaseTestingUtility; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.KeyValue; 040import org.apache.hadoop.hbase.PrivateCellUtil; 041import org.apache.hadoop.hbase.Tag; 042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 043import org.apache.hadoop.hbase.testclassification.IOTests; 044import org.apache.hadoop.hbase.testclassification.SmallTests; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.junit.Before; 047import org.junit.ClassRule; 048import org.junit.Test; 049import org.junit.experimental.categories.Category; 050import org.junit.runner.RunWith; 051import org.junit.runners.Parameterized; 052import org.junit.runners.Parameterized.Parameters; 053 054/** 055 * Test {@link HFileScanner#seekTo(Cell)} and its variants. 056 */ 057@Category({IOTests.class, SmallTests.class}) 058@RunWith(Parameterized.class) 059public class TestSeekTo { 060 061 @ClassRule 062 public static final HBaseClassTestRule CLASS_RULE = 063 HBaseClassTestRule.forClass(TestSeekTo.class); 064 065 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 066 private final DataBlockEncoding encoding; 067 @Parameters 068 public static Collection<Object[]> parameters() { 069 List<Object[]> paramList = new ArrayList<>(); 070 for (DataBlockEncoding encoding : DataBlockEncoding.values()) { 071 paramList.add(new Object[] { encoding }); 072 } 073 return paramList; 074 } 075 static boolean switchKVs = false; 076 077 public TestSeekTo(DataBlockEncoding encoding) { 078 this.encoding = encoding; 079 } 080 081 @Before 082 public void setUp() { 083 //reset 084 switchKVs = false; 085 } 086 087 static KeyValue toKV(String row, TagUsage tagUsage) { 088 if (tagUsage == TagUsage.NO_TAG) { 089 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 090 Bytes.toBytes("value")); 091 } else if (tagUsage == TagUsage.ONLY_TAG) { 092 Tag t = new ArrayBackedTag((byte) 1, "myTag1"); 093 Tag[] tags = new Tag[1]; 094 tags[0] = t; 095 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 096 HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); 097 } else { 098 if (!switchKVs) { 099 switchKVs = true; 100 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), 101 Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value")); 102 } else { 103 switchKVs = false; 104 Tag t = new ArrayBackedTag((byte) 1, "myTag1"); 105 Tag[] tags = new Tag[1]; 106 tags[0] = t; 107 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), 108 Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); 109 } 110 } 111 } 112 static String toRowStr(Cell c) { 113 return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength()); 114 } 115 116 Path makeNewFile(TagUsage tagUsage) throws IOException { 117 Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile"); 118 FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile); 119 int blocksize = toKV("a", tagUsage).getLength() * 3; 120 HFileContext context = new HFileContextBuilder().withBlockSize(blocksize) 121 .withDataBlockEncoding(encoding) 122 .withIncludesTags(true).build(); 123 Configuration conf = TEST_UTIL.getConfiguration(); 124 HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withOutputStream(fout) 125 .withFileContext(context).create(); 126 // 4 bytes * 3 * 2 for each key/value + 127 // 3 for keys, 15 for values = 42 (woot) 128 writer.append(toKV("c", tagUsage)); 129 writer.append(toKV("e", tagUsage)); 130 writer.append(toKV("g", tagUsage)); 131 // block transition 132 writer.append(toKV("i", tagUsage)); 133 writer.append(toKV("k", tagUsage)); 134 writer.close(); 135 fout.close(); 136 return ncTFile; 137 } 138 139 @Test 140 public void testSeekBefore() throws Exception { 141 testSeekBeforeInternals(TagUsage.NO_TAG); 142 testSeekBeforeInternals(TagUsage.ONLY_TAG); 143 testSeekBeforeInternals(TagUsage.PARTIAL_TAG); 144 } 145 146 protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException { 147 Path p = makeNewFile(tagUsage); 148 FileSystem fs = TEST_UTIL.getTestFileSystem(); 149 Configuration conf = TEST_UTIL.getConfiguration(); 150 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 151 HFileScanner scanner = reader.getScanner(false, true); 152 assertFalse(scanner.seekBefore(toKV("a", tagUsage))); 153 154 assertFalse(scanner.seekBefore(toKV("c", tagUsage))); 155 156 assertTrue(scanner.seekBefore(toKV("d", tagUsage))); 157 assertEquals("c", toRowStr(scanner.getCell())); 158 159 assertTrue(scanner.seekBefore(toKV("e", tagUsage))); 160 assertEquals("c", toRowStr(scanner.getCell())); 161 162 assertTrue(scanner.seekBefore(toKV("f", tagUsage))); 163 assertEquals("e", toRowStr(scanner.getCell())); 164 165 assertTrue(scanner.seekBefore(toKV("g", tagUsage))); 166 assertEquals("e", toRowStr(scanner.getCell())); 167 assertTrue(scanner.seekBefore(toKV("h", tagUsage))); 168 assertEquals("g", toRowStr(scanner.getCell())); 169 assertTrue(scanner.seekBefore(toKV("i", tagUsage))); 170 assertEquals("g", toRowStr(scanner.getCell())); 171 assertTrue(scanner.seekBefore(toKV("j", tagUsage))); 172 assertEquals("i", toRowStr(scanner.getCell())); 173 Cell cell = scanner.getCell(); 174 if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) { 175 Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell); 176 while (tagsIterator.hasNext()) { 177 Tag next = tagsIterator.next(); 178 assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next))); 179 } 180 } 181 assertTrue(scanner.seekBefore(toKV("k", tagUsage))); 182 assertEquals("i", toRowStr(scanner.getCell())); 183 assertTrue(scanner.seekBefore(toKV("l", tagUsage))); 184 assertEquals("k", toRowStr(scanner.getCell())); 185 186 reader.close(); 187 deleteTestDir(fs); 188 } 189 190 protected void deleteTestDir(FileSystem fs) throws IOException { 191 Path dataTestDir = TEST_UTIL.getDataTestDir(); 192 if(fs.exists(dataTestDir)) { 193 fs.delete(dataTestDir, true); 194 } 195 } 196 197 @Test 198 public void testSeekBeforeWithReSeekTo() throws Exception { 199 testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG); 200 testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG); 201 testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG); 202 } 203 204 protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException { 205 Path p = makeNewFile(tagUsage); 206 FileSystem fs = TEST_UTIL.getTestFileSystem(); 207 Configuration conf = TEST_UTIL.getConfiguration(); 208 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 209 HFileScanner scanner = reader.getScanner(false, true); 210 assertFalse(scanner.seekBefore(toKV("a", tagUsage))); 211 assertFalse(scanner.seekBefore(toKV("b", tagUsage))); 212 assertFalse(scanner.seekBefore(toKV("c", tagUsage))); 213 214 // seekBefore d, so the scanner points to c 215 assertTrue(scanner.seekBefore(toKV("d", tagUsage))); 216 assertFalse(scanner.getCell() instanceof ByteBufferKeyValue); 217 assertEquals("c", toRowStr(scanner.getCell())); 218 // reseekTo e and g 219 assertEquals(0, scanner.reseekTo(toKV("c", tagUsage))); 220 assertEquals("c", toRowStr(scanner.getCell())); 221 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 222 assertEquals("g", toRowStr(scanner.getCell())); 223 224 // seekBefore e, so the scanner points to c 225 assertTrue(scanner.seekBefore(toKV("e", tagUsage))); 226 assertEquals("c", toRowStr(scanner.getCell())); 227 // reseekTo e and g 228 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 229 assertEquals("e", toRowStr(scanner.getCell())); 230 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 231 assertEquals("g", toRowStr(scanner.getCell())); 232 233 // seekBefore f, so the scanner points to e 234 assertTrue(scanner.seekBefore(toKV("f", tagUsage))); 235 assertEquals("e", toRowStr(scanner.getCell())); 236 // reseekTo e and g 237 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 238 assertEquals("e", toRowStr(scanner.getCell())); 239 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 240 assertEquals("g", toRowStr(scanner.getCell())); 241 242 // seekBefore g, so the scanner points to e 243 assertTrue(scanner.seekBefore(toKV("g", tagUsage))); 244 assertEquals("e", toRowStr(scanner.getCell())); 245 // reseekTo e and g again 246 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 247 assertEquals("e", toRowStr(scanner.getCell())); 248 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 249 assertEquals("g", toRowStr(scanner.getCell())); 250 251 // seekBefore h, so the scanner points to g 252 assertTrue(scanner.seekBefore(toKV("h", tagUsage))); 253 assertEquals("g", toRowStr(scanner.getCell())); 254 // reseekTo g 255 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 256 assertEquals("g", toRowStr(scanner.getCell())); 257 258 // seekBefore i, so the scanner points to g 259 assertTrue(scanner.seekBefore(toKV("i", tagUsage))); 260 assertEquals("g", toRowStr(scanner.getCell())); 261 // reseekTo g 262 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 263 assertEquals("g", toRowStr(scanner.getCell())); 264 265 // seekBefore j, so the scanner points to i 266 assertTrue(scanner.seekBefore(toKV("j", tagUsage))); 267 assertEquals("i", toRowStr(scanner.getCell())); 268 // reseekTo i 269 assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); 270 assertEquals("i", toRowStr(scanner.getCell())); 271 272 // seekBefore k, so the scanner points to i 273 assertTrue(scanner.seekBefore(toKV("k", tagUsage))); 274 assertEquals("i", toRowStr(scanner.getCell())); 275 // reseekTo i and k 276 assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); 277 assertEquals("i", toRowStr(scanner.getCell())); 278 assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); 279 assertEquals("k", toRowStr(scanner.getCell())); 280 281 // seekBefore l, so the scanner points to k 282 assertTrue(scanner.seekBefore(toKV("l", tagUsage))); 283 assertEquals("k", toRowStr(scanner.getCell())); 284 // reseekTo k 285 assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); 286 assertEquals("k", toRowStr(scanner.getCell())); 287 deleteTestDir(fs); 288 } 289 290 @Test 291 public void testSeekTo() throws Exception { 292 testSeekToInternals(TagUsage.NO_TAG); 293 testSeekToInternals(TagUsage.ONLY_TAG); 294 testSeekToInternals(TagUsage.PARTIAL_TAG); 295 } 296 297 protected void testSeekToInternals(TagUsage tagUsage) throws IOException { 298 Path p = makeNewFile(tagUsage); 299 FileSystem fs = TEST_UTIL.getTestFileSystem(); 300 Configuration conf = TEST_UTIL.getConfiguration(); 301 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 302 assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount()); 303 HFileScanner scanner = reader.getScanner(false, true); 304 // lies before the start of the file. 305 assertEquals(-1, scanner.seekTo(toKV("a", tagUsage))); 306 307 assertEquals(1, scanner.seekTo(toKV("d", tagUsage))); 308 assertEquals("c", toRowStr(scanner.getCell())); 309 310 // Across a block boundary now. 311 // 'h' does not exist so we will get a '1' back for not found. 312 assertEquals(0, scanner.seekTo(toKV("i", tagUsage))); 313 assertEquals("i", toRowStr(scanner.getCell())); 314 315 assertEquals(1, scanner.seekTo(toKV("l", tagUsage))); 316 assertEquals("k", toRowStr(scanner.getCell())); 317 318 reader.close(); 319 deleteTestDir(fs); 320 } 321 322 @Test 323 public void testBlockContainingKey() throws Exception { 324 testBlockContainingKeyInternals(TagUsage.NO_TAG); 325 testBlockContainingKeyInternals(TagUsage.ONLY_TAG); 326 testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG); 327 } 328 329 protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException { 330 Path p = makeNewFile(tagUsage); 331 FileSystem fs = TEST_UTIL.getTestFileSystem(); 332 Configuration conf = TEST_UTIL.getConfiguration(); 333 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 334 HFileBlockIndex.BlockIndexReader blockIndexReader = 335 reader.getDataBlockIndexReader(); 336 System.out.println(blockIndexReader.toString()); 337 // falls before the start of the file. 338 assertEquals(-1, blockIndexReader.rootBlockContainingKey( 339 toKV("a", tagUsage))); 340 assertEquals(0, blockIndexReader.rootBlockContainingKey( 341 toKV("c", tagUsage))); 342 assertEquals(0, blockIndexReader.rootBlockContainingKey( 343 toKV("d", tagUsage))); 344 assertEquals(0, blockIndexReader.rootBlockContainingKey( 345 toKV("e", tagUsage))); 346 assertEquals(0, blockIndexReader.rootBlockContainingKey( 347 toKV("g", tagUsage))); 348 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage))); 349 assertEquals(1, blockIndexReader.rootBlockContainingKey( 350 toKV("i", tagUsage))); 351 assertEquals(1, blockIndexReader.rootBlockContainingKey( 352 toKV("j", tagUsage))); 353 assertEquals(1, blockIndexReader.rootBlockContainingKey( 354 toKV("k", tagUsage))); 355 assertEquals(1, blockIndexReader.rootBlockContainingKey( 356 toKV("l", tagUsage))); 357 reader.close(); 358 deleteTestDir(fs); 359 } 360}