001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertFalse; 022import static org.junit.jupiter.api.Assertions.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Iterator; 027import java.util.List; 028import java.util.stream.Stream; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.ArrayBackedTag; 034import org.apache.hadoop.hbase.ByteBufferKeyValue; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.ExtendedCell; 037import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate; 038import org.apache.hadoop.hbase.HBaseTestingUtil; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.KeyValue; 041import org.apache.hadoop.hbase.PrivateCellUtil; 042import org.apache.hadoop.hbase.Tag; 043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 044import org.apache.hadoop.hbase.testclassification.IOTests; 045import org.apache.hadoop.hbase.testclassification.SmallTests; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.junit.jupiter.api.BeforeEach; 048import org.junit.jupiter.api.TestTemplate; 049import org.junit.jupiter.params.provider.Arguments; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053/** 054 * Test {@link HFileScanner#seekTo(Cell)} and its variants. 055 */ 056@org.junit.jupiter.api.Tag(IOTests.TAG) 057@org.junit.jupiter.api.Tag(SmallTests.TAG) 058@HBaseParameterizedTestTemplate(name = "{index}: encoding={0}") 059public class TestSeekTo { 060 061 private static final Logger LOG = LoggerFactory.getLogger(TestSeekTo.class); 062 063 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 064 private final DataBlockEncoding encoding; 065 066 public static Stream<Arguments> parameters() { 067 List<Arguments> paramList = new ArrayList<>(); 068 for (DataBlockEncoding encoding : DataBlockEncoding.values()) { 069 paramList.add(Arguments.of(encoding)); 070 } 071 return paramList.stream(); 072 } 073 074 static boolean switchKVs = false; 075 076 public TestSeekTo(DataBlockEncoding encoding) { 077 this.encoding = encoding; 078 } 079 080 @BeforeEach 081 public void setUp() { 082 // reset 083 switchKVs = false; 084 } 085 086 static KeyValue toKV(String row, TagUsage tagUsage) { 087 if (tagUsage == TagUsage.NO_TAG) { 088 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 089 Bytes.toBytes("value")); 090 } else if (tagUsage == TagUsage.ONLY_TAG) { 091 Tag t = new ArrayBackedTag((byte) 1, "myTag1"); 092 Tag[] tags = new Tag[1]; 093 tags[0] = t; 094 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 095 HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); 096 } else { 097 if (!switchKVs) { 098 switchKVs = true; 099 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 100 HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value")); 101 } else { 102 switchKVs = false; 103 Tag t = new ArrayBackedTag((byte) 1, "myTag1"); 104 Tag[] tags = new Tag[1]; 105 tags[0] = t; 106 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 107 HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); 108 } 109 } 110 } 111 112 static String toRowStr(Cell c) { 113 return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength()); 114 } 115 116 Path makeNewFile(TagUsage tagUsage) throws IOException { 117 Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile"); 118 FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile); 119 int blocksize = toKV("a", tagUsage).getLength() * 3; 120 HFileContext context = new HFileContextBuilder().withBlockSize(blocksize) 121 .withDataBlockEncoding(encoding).withIncludesTags(true).build(); 122 Configuration conf = TEST_UTIL.getConfiguration(); 123 HFile.Writer writer = 124 HFile.getWriterFactoryNoCache(conf).withOutputStream(fout).withFileContext(context).create(); 125 // 4 bytes * 3 * 2 for each key/value + 126 // 3 for keys, 15 for values = 42 (woot) 127 writer.append(toKV("c", tagUsage)); 128 writer.append(toKV("e", tagUsage)); 129 writer.append(toKV("g", tagUsage)); 130 // block transition 131 writer.append(toKV("i", tagUsage)); 132 writer.append(toKV("k", tagUsage)); 133 writer.close(); 134 fout.close(); 135 return ncTFile; 136 } 137 138 @TestTemplate 139 public void testSeekBefore() throws Exception { 140 testSeekBeforeInternals(TagUsage.NO_TAG); 141 testSeekBeforeInternals(TagUsage.ONLY_TAG); 142 testSeekBeforeInternals(TagUsage.PARTIAL_TAG); 143 } 144 145 protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException { 146 Path p = makeNewFile(tagUsage); 147 FileSystem fs = TEST_UTIL.getTestFileSystem(); 148 Configuration conf = TEST_UTIL.getConfiguration(); 149 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 150 HFileScanner scanner = reader.getScanner(conf, false, true); 151 assertFalse(scanner.seekBefore(toKV("a", tagUsage))); 152 153 assertFalse(scanner.seekBefore(toKV("c", tagUsage))); 154 155 assertTrue(scanner.seekBefore(toKV("d", tagUsage))); 156 assertEquals("c", toRowStr(scanner.getCell())); 157 158 assertTrue(scanner.seekBefore(toKV("e", tagUsage))); 159 assertEquals("c", toRowStr(scanner.getCell())); 160 161 assertTrue(scanner.seekBefore(toKV("f", tagUsage))); 162 assertEquals("e", toRowStr(scanner.getCell())); 163 164 assertTrue(scanner.seekBefore(toKV("g", tagUsage))); 165 assertEquals("e", toRowStr(scanner.getCell())); 166 assertTrue(scanner.seekBefore(toKV("h", tagUsage))); 167 assertEquals("g", toRowStr(scanner.getCell())); 168 assertTrue(scanner.seekBefore(toKV("i", tagUsage))); 169 assertEquals("g", toRowStr(scanner.getCell())); 170 assertTrue(scanner.seekBefore(toKV("j", tagUsage))); 171 assertEquals("i", toRowStr(scanner.getCell())); 172 ExtendedCell cell = scanner.getCell(); 173 if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) { 174 Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell); 175 while (tagsIterator.hasNext()) { 176 Tag next = tagsIterator.next(); 177 assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next))); 178 } 179 } 180 assertTrue(scanner.seekBefore(toKV("k", tagUsage))); 181 assertEquals("i", toRowStr(scanner.getCell())); 182 assertTrue(scanner.seekBefore(toKV("l", tagUsage))); 183 assertEquals("k", toRowStr(scanner.getCell())); 184 185 reader.close(); 186 deleteTestDir(fs); 187 } 188 189 protected void deleteTestDir(FileSystem fs) throws IOException { 190 Path dataTestDir = TEST_UTIL.getDataTestDir(); 191 if (fs.exists(dataTestDir)) { 192 fs.delete(dataTestDir, true); 193 } 194 } 195 196 @TestTemplate 197 public void testSeekBeforeWithReSeekTo() throws Exception { 198 testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG); 199 testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG); 200 testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG); 201 } 202 203 protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException { 204 Path p = makeNewFile(tagUsage); 205 FileSystem fs = TEST_UTIL.getTestFileSystem(); 206 Configuration conf = TEST_UTIL.getConfiguration(); 207 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 208 HFileScanner scanner = reader.getScanner(conf, false, true); 209 assertFalse(scanner.seekBefore(toKV("a", tagUsage))); 210 assertFalse(scanner.seekBefore(toKV("b", tagUsage))); 211 assertFalse(scanner.seekBefore(toKV("c", tagUsage))); 212 213 // seekBefore d, so the scanner points to c 214 assertTrue(scanner.seekBefore(toKV("d", tagUsage))); 215 assertFalse(scanner.getCell() instanceof ByteBufferKeyValue); 216 assertEquals("c", toRowStr(scanner.getCell())); 217 // reseekTo e and g 218 assertEquals(0, scanner.reseekTo(toKV("c", tagUsage))); 219 assertEquals("c", toRowStr(scanner.getCell())); 220 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 221 assertEquals("g", toRowStr(scanner.getCell())); 222 223 // seekBefore e, so the scanner points to c 224 assertTrue(scanner.seekBefore(toKV("e", tagUsage))); 225 assertEquals("c", toRowStr(scanner.getCell())); 226 // reseekTo e and g 227 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 228 assertEquals("e", toRowStr(scanner.getCell())); 229 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 230 assertEquals("g", toRowStr(scanner.getCell())); 231 232 // seekBefore f, so the scanner points to e 233 assertTrue(scanner.seekBefore(toKV("f", tagUsage))); 234 assertEquals("e", toRowStr(scanner.getCell())); 235 // reseekTo e and g 236 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 237 assertEquals("e", toRowStr(scanner.getCell())); 238 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 239 assertEquals("g", toRowStr(scanner.getCell())); 240 241 // seekBefore g, so the scanner points to e 242 assertTrue(scanner.seekBefore(toKV("g", tagUsage))); 243 assertEquals("e", toRowStr(scanner.getCell())); 244 // reseekTo e and g again 245 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 246 assertEquals("e", toRowStr(scanner.getCell())); 247 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 248 assertEquals("g", toRowStr(scanner.getCell())); 249 250 // seekBefore h, so the scanner points to g 251 assertTrue(scanner.seekBefore(toKV("h", tagUsage))); 252 assertEquals("g", toRowStr(scanner.getCell())); 253 // reseekTo g 254 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 255 assertEquals("g", toRowStr(scanner.getCell())); 256 257 // seekBefore i, so the scanner points to g 258 assertTrue(scanner.seekBefore(toKV("i", tagUsage))); 259 assertEquals("g", toRowStr(scanner.getCell())); 260 // reseekTo g 261 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 262 assertEquals("g", toRowStr(scanner.getCell())); 263 264 // seekBefore j, so the scanner points to i 265 assertTrue(scanner.seekBefore(toKV("j", tagUsage))); 266 assertEquals("i", toRowStr(scanner.getCell())); 267 // reseekTo i 268 assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); 269 assertEquals("i", toRowStr(scanner.getCell())); 270 271 // seekBefore k, so the scanner points to i 272 assertTrue(scanner.seekBefore(toKV("k", tagUsage))); 273 assertEquals("i", toRowStr(scanner.getCell())); 274 // reseekTo i and k 275 assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); 276 assertEquals("i", toRowStr(scanner.getCell())); 277 assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); 278 assertEquals("k", toRowStr(scanner.getCell())); 279 280 // seekBefore l, so the scanner points to k 281 assertTrue(scanner.seekBefore(toKV("l", tagUsage))); 282 assertEquals("k", toRowStr(scanner.getCell())); 283 // reseekTo k 284 assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); 285 assertEquals("k", toRowStr(scanner.getCell())); 286 deleteTestDir(fs); 287 } 288 289 @TestTemplate 290 public void testSeekTo() throws Exception { 291 testSeekToInternals(TagUsage.NO_TAG); 292 testSeekToInternals(TagUsage.ONLY_TAG); 293 testSeekToInternals(TagUsage.PARTIAL_TAG); 294 } 295 296 protected void testSeekToInternals(TagUsage tagUsage) throws IOException { 297 Path p = makeNewFile(tagUsage); 298 FileSystem fs = TEST_UTIL.getTestFileSystem(); 299 Configuration conf = TEST_UTIL.getConfiguration(); 300 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 301 assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount()); 302 HFileScanner scanner = reader.getScanner(conf, false, true); 303 // lies before the start of the file. 304 assertEquals(-1, scanner.seekTo(toKV("a", tagUsage))); 305 306 assertEquals(1, scanner.seekTo(toKV("d", tagUsage))); 307 assertEquals("c", toRowStr(scanner.getCell())); 308 309 // Across a block boundary now. 310 // 'h' does not exist so we will get a '1' back for not found. 311 assertEquals(0, scanner.seekTo(toKV("i", tagUsage))); 312 assertEquals("i", toRowStr(scanner.getCell())); 313 314 assertEquals(1, scanner.seekTo(toKV("l", tagUsage))); 315 assertEquals("k", toRowStr(scanner.getCell())); 316 317 reader.close(); 318 deleteTestDir(fs); 319 } 320 321 @TestTemplate 322 public void testBlockContainingKey() throws Exception { 323 testBlockContainingKeyInternals(TagUsage.NO_TAG); 324 testBlockContainingKeyInternals(TagUsage.ONLY_TAG); 325 testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG); 326 } 327 328 protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException { 329 Path p = makeNewFile(tagUsage); 330 FileSystem fs = TEST_UTIL.getTestFileSystem(); 331 Configuration conf = TEST_UTIL.getConfiguration(); 332 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 333 HFileBlockIndex.BlockIndexReader blockIndexReader = reader.getDataBlockIndexReader(); 334 LOG.info(blockIndexReader.toString()); 335 // falls before the start of the file. 336 assertEquals(-1, blockIndexReader.rootBlockContainingKey(toKV("a", tagUsage))); 337 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("c", tagUsage))); 338 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("d", tagUsage))); 339 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("e", tagUsage))); 340 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("g", tagUsage))); 341 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage))); 342 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("i", tagUsage))); 343 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("j", tagUsage))); 344 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("k", tagUsage))); 345 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("l", tagUsage))); 346 reader.close(); 347 deleteTestDir(fs); 348 } 349}