001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertFalse;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Iterator;
027import java.util.List;
028import java.util.stream.Stream;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.ArrayBackedTag;
034import org.apache.hadoop.hbase.ByteBufferKeyValue;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.ExtendedCell;
037import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate;
038import org.apache.hadoop.hbase.HBaseTestingUtil;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.KeyValue;
041import org.apache.hadoop.hbase.PrivateCellUtil;
042import org.apache.hadoop.hbase.Tag;
043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
044import org.apache.hadoop.hbase.testclassification.IOTests;
045import org.apache.hadoop.hbase.testclassification.SmallTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.junit.jupiter.api.BeforeEach;
048import org.junit.jupiter.api.TestTemplate;
049import org.junit.jupiter.params.provider.Arguments;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053/**
054 * Test {@link HFileScanner#seekTo(Cell)} and its variants.
055 */
056@org.junit.jupiter.api.Tag(IOTests.TAG)
057@org.junit.jupiter.api.Tag(SmallTests.TAG)
058@HBaseParameterizedTestTemplate(name = "{index}: encoding={0}")
059public class TestSeekTo {
060
061  private static final Logger LOG = LoggerFactory.getLogger(TestSeekTo.class);
062
063  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
064  private final DataBlockEncoding encoding;
065
066  public static Stream<Arguments> parameters() {
067    List<Arguments> paramList = new ArrayList<>();
068    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
069      paramList.add(Arguments.of(encoding));
070    }
071    return paramList.stream();
072  }
073
074  static boolean switchKVs = false;
075
076  public TestSeekTo(DataBlockEncoding encoding) {
077    this.encoding = encoding;
078  }
079
080  @BeforeEach
081  public void setUp() {
082    // reset
083    switchKVs = false;
084  }
085
086  static KeyValue toKV(String row, TagUsage tagUsage) {
087    if (tagUsage == TagUsage.NO_TAG) {
088      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
089        Bytes.toBytes("value"));
090    } else if (tagUsage == TagUsage.ONLY_TAG) {
091      Tag t = new ArrayBackedTag((byte) 1, "myTag1");
092      Tag[] tags = new Tag[1];
093      tags[0] = t;
094      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
095        HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
096    } else {
097      if (!switchKVs) {
098        switchKVs = true;
099        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
100          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"));
101      } else {
102        switchKVs = false;
103        Tag t = new ArrayBackedTag((byte) 1, "myTag1");
104        Tag[] tags = new Tag[1];
105        tags[0] = t;
106        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
107          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
108      }
109    }
110  }
111
112  static String toRowStr(Cell c) {
113    return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength());
114  }
115
116  Path makeNewFile(TagUsage tagUsage) throws IOException {
117    Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile");
118    FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile);
119    int blocksize = toKV("a", tagUsage).getLength() * 3;
120    HFileContext context = new HFileContextBuilder().withBlockSize(blocksize)
121      .withDataBlockEncoding(encoding).withIncludesTags(true).build();
122    Configuration conf = TEST_UTIL.getConfiguration();
123    HFile.Writer writer =
124      HFile.getWriterFactoryNoCache(conf).withOutputStream(fout).withFileContext(context).create();
125    // 4 bytes * 3 * 2 for each key/value +
126    // 3 for keys, 15 for values = 42 (woot)
127    writer.append(toKV("c", tagUsage));
128    writer.append(toKV("e", tagUsage));
129    writer.append(toKV("g", tagUsage));
130    // block transition
131    writer.append(toKV("i", tagUsage));
132    writer.append(toKV("k", tagUsage));
133    writer.close();
134    fout.close();
135    return ncTFile;
136  }
137
138  @TestTemplate
139  public void testSeekBefore() throws Exception {
140    testSeekBeforeInternals(TagUsage.NO_TAG);
141    testSeekBeforeInternals(TagUsage.ONLY_TAG);
142    testSeekBeforeInternals(TagUsage.PARTIAL_TAG);
143  }
144
145  protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException {
146    Path p = makeNewFile(tagUsage);
147    FileSystem fs = TEST_UTIL.getTestFileSystem();
148    Configuration conf = TEST_UTIL.getConfiguration();
149    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
150    HFileScanner scanner = reader.getScanner(conf, false, true);
151    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
152
153    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
154
155    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
156    assertEquals("c", toRowStr(scanner.getCell()));
157
158    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
159    assertEquals("c", toRowStr(scanner.getCell()));
160
161    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
162    assertEquals("e", toRowStr(scanner.getCell()));
163
164    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
165    assertEquals("e", toRowStr(scanner.getCell()));
166    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
167    assertEquals("g", toRowStr(scanner.getCell()));
168    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
169    assertEquals("g", toRowStr(scanner.getCell()));
170    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
171    assertEquals("i", toRowStr(scanner.getCell()));
172    ExtendedCell cell = scanner.getCell();
173    if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) {
174      Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell);
175      while (tagsIterator.hasNext()) {
176        Tag next = tagsIterator.next();
177        assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next)));
178      }
179    }
180    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
181    assertEquals("i", toRowStr(scanner.getCell()));
182    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
183    assertEquals("k", toRowStr(scanner.getCell()));
184
185    reader.close();
186    deleteTestDir(fs);
187  }
188
189  protected void deleteTestDir(FileSystem fs) throws IOException {
190    Path dataTestDir = TEST_UTIL.getDataTestDir();
191    if (fs.exists(dataTestDir)) {
192      fs.delete(dataTestDir, true);
193    }
194  }
195
196  @TestTemplate
197  public void testSeekBeforeWithReSeekTo() throws Exception {
198    testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG);
199    testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG);
200    testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG);
201  }
202
203  protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException {
204    Path p = makeNewFile(tagUsage);
205    FileSystem fs = TEST_UTIL.getTestFileSystem();
206    Configuration conf = TEST_UTIL.getConfiguration();
207    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
208    HFileScanner scanner = reader.getScanner(conf, false, true);
209    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
210    assertFalse(scanner.seekBefore(toKV("b", tagUsage)));
211    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
212
213    // seekBefore d, so the scanner points to c
214    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
215    assertFalse(scanner.getCell() instanceof ByteBufferKeyValue);
216    assertEquals("c", toRowStr(scanner.getCell()));
217    // reseekTo e and g
218    assertEquals(0, scanner.reseekTo(toKV("c", tagUsage)));
219    assertEquals("c", toRowStr(scanner.getCell()));
220    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
221    assertEquals("g", toRowStr(scanner.getCell()));
222
223    // seekBefore e, so the scanner points to c
224    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
225    assertEquals("c", toRowStr(scanner.getCell()));
226    // reseekTo e and g
227    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
228    assertEquals("e", toRowStr(scanner.getCell()));
229    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
230    assertEquals("g", toRowStr(scanner.getCell()));
231
232    // seekBefore f, so the scanner points to e
233    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
234    assertEquals("e", toRowStr(scanner.getCell()));
235    // reseekTo e and g
236    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
237    assertEquals("e", toRowStr(scanner.getCell()));
238    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
239    assertEquals("g", toRowStr(scanner.getCell()));
240
241    // seekBefore g, so the scanner points to e
242    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
243    assertEquals("e", toRowStr(scanner.getCell()));
244    // reseekTo e and g again
245    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
246    assertEquals("e", toRowStr(scanner.getCell()));
247    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
248    assertEquals("g", toRowStr(scanner.getCell()));
249
250    // seekBefore h, so the scanner points to g
251    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
252    assertEquals("g", toRowStr(scanner.getCell()));
253    // reseekTo g
254    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
255    assertEquals("g", toRowStr(scanner.getCell()));
256
257    // seekBefore i, so the scanner points to g
258    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
259    assertEquals("g", toRowStr(scanner.getCell()));
260    // reseekTo g
261    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
262    assertEquals("g", toRowStr(scanner.getCell()));
263
264    // seekBefore j, so the scanner points to i
265    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
266    assertEquals("i", toRowStr(scanner.getCell()));
267    // reseekTo i
268    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
269    assertEquals("i", toRowStr(scanner.getCell()));
270
271    // seekBefore k, so the scanner points to i
272    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
273    assertEquals("i", toRowStr(scanner.getCell()));
274    // reseekTo i and k
275    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
276    assertEquals("i", toRowStr(scanner.getCell()));
277    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
278    assertEquals("k", toRowStr(scanner.getCell()));
279
280    // seekBefore l, so the scanner points to k
281    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
282    assertEquals("k", toRowStr(scanner.getCell()));
283    // reseekTo k
284    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
285    assertEquals("k", toRowStr(scanner.getCell()));
286    deleteTestDir(fs);
287  }
288
289  @TestTemplate
290  public void testSeekTo() throws Exception {
291    testSeekToInternals(TagUsage.NO_TAG);
292    testSeekToInternals(TagUsage.ONLY_TAG);
293    testSeekToInternals(TagUsage.PARTIAL_TAG);
294  }
295
296  protected void testSeekToInternals(TagUsage tagUsage) throws IOException {
297    Path p = makeNewFile(tagUsage);
298    FileSystem fs = TEST_UTIL.getTestFileSystem();
299    Configuration conf = TEST_UTIL.getConfiguration();
300    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
301    assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount());
302    HFileScanner scanner = reader.getScanner(conf, false, true);
303    // lies before the start of the file.
304    assertEquals(-1, scanner.seekTo(toKV("a", tagUsage)));
305
306    assertEquals(1, scanner.seekTo(toKV("d", tagUsage)));
307    assertEquals("c", toRowStr(scanner.getCell()));
308
309    // Across a block boundary now.
310    // 'h' does not exist so we will get a '1' back for not found.
311    assertEquals(0, scanner.seekTo(toKV("i", tagUsage)));
312    assertEquals("i", toRowStr(scanner.getCell()));
313
314    assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
315    assertEquals("k", toRowStr(scanner.getCell()));
316
317    reader.close();
318    deleteTestDir(fs);
319  }
320
321  @TestTemplate
322  public void testBlockContainingKey() throws Exception {
323    testBlockContainingKeyInternals(TagUsage.NO_TAG);
324    testBlockContainingKeyInternals(TagUsage.ONLY_TAG);
325    testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG);
326  }
327
328  protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException {
329    Path p = makeNewFile(tagUsage);
330    FileSystem fs = TEST_UTIL.getTestFileSystem();
331    Configuration conf = TEST_UTIL.getConfiguration();
332    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
333    HFileBlockIndex.BlockIndexReader blockIndexReader = reader.getDataBlockIndexReader();
334    LOG.info(blockIndexReader.toString());
335    // falls before the start of the file.
336    assertEquals(-1, blockIndexReader.rootBlockContainingKey(toKV("a", tagUsage)));
337    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("c", tagUsage)));
338    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("d", tagUsage)));
339    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("e", tagUsage)));
340    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("g", tagUsage)));
341    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage)));
342    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("i", tagUsage)));
343    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("j", tagUsage)));
344    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("k", tagUsage)));
345    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("l", tagUsage)));
346    reader.close();
347    deleteTestDir(fs);
348  }
349}