001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.Iterator;
028import java.util.List;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.ArrayBackedTag;
034import org.apache.hadoop.hbase.ByteBufferKeyValue;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.HBaseClassTestRule;
037import org.apache.hadoop.hbase.HBaseTestingUtil;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.KeyValue;
040import org.apache.hadoop.hbase.PrivateCellUtil;
041import org.apache.hadoop.hbase.Tag;
042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
043import org.apache.hadoop.hbase.testclassification.IOTests;
044import org.apache.hadoop.hbase.testclassification.SmallTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.junit.Before;
047import org.junit.ClassRule;
048import org.junit.Test;
049import org.junit.experimental.categories.Category;
050import org.junit.runner.RunWith;
051import org.junit.runners.Parameterized;
052import org.junit.runners.Parameterized.Parameters;
053
054/**
055 * Test {@link HFileScanner#seekTo(Cell)} and its variants.
056 */
057@Category({ IOTests.class, SmallTests.class })
058@RunWith(Parameterized.class)
059public class TestSeekTo {
060
061  @ClassRule
062  public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestSeekTo.class);
063
064  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
065  private final DataBlockEncoding encoding;
066
067  @Parameters
068  public static Collection<Object[]> parameters() {
069    List<Object[]> paramList = new ArrayList<>();
070    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
071      paramList.add(new Object[] { encoding });
072    }
073    return paramList;
074  }
075
076  static boolean switchKVs = false;
077
078  public TestSeekTo(DataBlockEncoding encoding) {
079    this.encoding = encoding;
080  }
081
082  @Before
083  public void setUp() {
084    // reset
085    switchKVs = false;
086  }
087
088  static KeyValue toKV(String row, TagUsage tagUsage) {
089    if (tagUsage == TagUsage.NO_TAG) {
090      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
091        Bytes.toBytes("value"));
092    } else if (tagUsage == TagUsage.ONLY_TAG) {
093      Tag t = new ArrayBackedTag((byte) 1, "myTag1");
094      Tag[] tags = new Tag[1];
095      tags[0] = t;
096      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
097        HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
098    } else {
099      if (!switchKVs) {
100        switchKVs = true;
101        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
102          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"));
103      } else {
104        switchKVs = false;
105        Tag t = new ArrayBackedTag((byte) 1, "myTag1");
106        Tag[] tags = new Tag[1];
107        tags[0] = t;
108        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
109          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
110      }
111    }
112  }
113
114  static String toRowStr(Cell c) {
115    return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength());
116  }
117
118  Path makeNewFile(TagUsage tagUsage) throws IOException {
119    Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile");
120    FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile);
121    int blocksize = toKV("a", tagUsage).getLength() * 3;
122    HFileContext context = new HFileContextBuilder().withBlockSize(blocksize)
123      .withDataBlockEncoding(encoding).withIncludesTags(true).build();
124    Configuration conf = TEST_UTIL.getConfiguration();
125    HFile.Writer writer =
126      HFile.getWriterFactoryNoCache(conf).withOutputStream(fout).withFileContext(context).create();
127    // 4 bytes * 3 * 2 for each key/value +
128    // 3 for keys, 15 for values = 42 (woot)
129    writer.append(toKV("c", tagUsage));
130    writer.append(toKV("e", tagUsage));
131    writer.append(toKV("g", tagUsage));
132    // block transition
133    writer.append(toKV("i", tagUsage));
134    writer.append(toKV("k", tagUsage));
135    writer.close();
136    fout.close();
137    return ncTFile;
138  }
139
140  @Test
141  public void testSeekBefore() throws Exception {
142    testSeekBeforeInternals(TagUsage.NO_TAG);
143    testSeekBeforeInternals(TagUsage.ONLY_TAG);
144    testSeekBeforeInternals(TagUsage.PARTIAL_TAG);
145  }
146
147  protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException {
148    Path p = makeNewFile(tagUsage);
149    FileSystem fs = TEST_UTIL.getTestFileSystem();
150    Configuration conf = TEST_UTIL.getConfiguration();
151    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
152    HFileScanner scanner = reader.getScanner(conf, false, true);
153    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
154
155    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
156
157    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
158    assertEquals("c", toRowStr(scanner.getCell()));
159
160    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
161    assertEquals("c", toRowStr(scanner.getCell()));
162
163    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
164    assertEquals("e", toRowStr(scanner.getCell()));
165
166    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
167    assertEquals("e", toRowStr(scanner.getCell()));
168    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
169    assertEquals("g", toRowStr(scanner.getCell()));
170    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
171    assertEquals("g", toRowStr(scanner.getCell()));
172    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
173    assertEquals("i", toRowStr(scanner.getCell()));
174    Cell cell = scanner.getCell();
175    if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) {
176      Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell);
177      while (tagsIterator.hasNext()) {
178        Tag next = tagsIterator.next();
179        assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next)));
180      }
181    }
182    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
183    assertEquals("i", toRowStr(scanner.getCell()));
184    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
185    assertEquals("k", toRowStr(scanner.getCell()));
186
187    reader.close();
188    deleteTestDir(fs);
189  }
190
191  protected void deleteTestDir(FileSystem fs) throws IOException {
192    Path dataTestDir = TEST_UTIL.getDataTestDir();
193    if (fs.exists(dataTestDir)) {
194      fs.delete(dataTestDir, true);
195    }
196  }
197
198  @Test
199  public void testSeekBeforeWithReSeekTo() throws Exception {
200    testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG);
201    testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG);
202    testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG);
203  }
204
205  protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException {
206    Path p = makeNewFile(tagUsage);
207    FileSystem fs = TEST_UTIL.getTestFileSystem();
208    Configuration conf = TEST_UTIL.getConfiguration();
209    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
210    HFileScanner scanner = reader.getScanner(conf, false, true);
211    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
212    assertFalse(scanner.seekBefore(toKV("b", tagUsage)));
213    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
214
215    // seekBefore d, so the scanner points to c
216    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
217    assertFalse(scanner.getCell() instanceof ByteBufferKeyValue);
218    assertEquals("c", toRowStr(scanner.getCell()));
219    // reseekTo e and g
220    assertEquals(0, scanner.reseekTo(toKV("c", tagUsage)));
221    assertEquals("c", toRowStr(scanner.getCell()));
222    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
223    assertEquals("g", toRowStr(scanner.getCell()));
224
225    // seekBefore e, so the scanner points to c
226    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
227    assertEquals("c", toRowStr(scanner.getCell()));
228    // reseekTo e and g
229    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
230    assertEquals("e", toRowStr(scanner.getCell()));
231    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
232    assertEquals("g", toRowStr(scanner.getCell()));
233
234    // seekBefore f, so the scanner points to e
235    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
236    assertEquals("e", toRowStr(scanner.getCell()));
237    // reseekTo e and g
238    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
239    assertEquals("e", toRowStr(scanner.getCell()));
240    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
241    assertEquals("g", toRowStr(scanner.getCell()));
242
243    // seekBefore g, so the scanner points to e
244    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
245    assertEquals("e", toRowStr(scanner.getCell()));
246    // reseekTo e and g again
247    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
248    assertEquals("e", toRowStr(scanner.getCell()));
249    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
250    assertEquals("g", toRowStr(scanner.getCell()));
251
252    // seekBefore h, so the scanner points to g
253    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
254    assertEquals("g", toRowStr(scanner.getCell()));
255    // reseekTo g
256    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
257    assertEquals("g", toRowStr(scanner.getCell()));
258
259    // seekBefore i, so the scanner points to g
260    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
261    assertEquals("g", toRowStr(scanner.getCell()));
262    // reseekTo g
263    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
264    assertEquals("g", toRowStr(scanner.getCell()));
265
266    // seekBefore j, so the scanner points to i
267    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
268    assertEquals("i", toRowStr(scanner.getCell()));
269    // reseekTo i
270    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
271    assertEquals("i", toRowStr(scanner.getCell()));
272
273    // seekBefore k, so the scanner points to i
274    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
275    assertEquals("i", toRowStr(scanner.getCell()));
276    // reseekTo i and k
277    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
278    assertEquals("i", toRowStr(scanner.getCell()));
279    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
280    assertEquals("k", toRowStr(scanner.getCell()));
281
282    // seekBefore l, so the scanner points to k
283    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
284    assertEquals("k", toRowStr(scanner.getCell()));
285    // reseekTo k
286    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
287    assertEquals("k", toRowStr(scanner.getCell()));
288    deleteTestDir(fs);
289  }
290
291  @Test
292  public void testSeekTo() throws Exception {
293    testSeekToInternals(TagUsage.NO_TAG);
294    testSeekToInternals(TagUsage.ONLY_TAG);
295    testSeekToInternals(TagUsage.PARTIAL_TAG);
296  }
297
298  protected void testSeekToInternals(TagUsage tagUsage) throws IOException {
299    Path p = makeNewFile(tagUsage);
300    FileSystem fs = TEST_UTIL.getTestFileSystem();
301    Configuration conf = TEST_UTIL.getConfiguration();
302    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
303    assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount());
304    HFileScanner scanner = reader.getScanner(conf, false, true);
305    // lies before the start of the file.
306    assertEquals(-1, scanner.seekTo(toKV("a", tagUsage)));
307
308    assertEquals(1, scanner.seekTo(toKV("d", tagUsage)));
309    assertEquals("c", toRowStr(scanner.getCell()));
310
311    // Across a block boundary now.
312    // 'h' does not exist so we will get a '1' back for not found.
313    assertEquals(0, scanner.seekTo(toKV("i", tagUsage)));
314    assertEquals("i", toRowStr(scanner.getCell()));
315
316    assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
317    assertEquals("k", toRowStr(scanner.getCell()));
318
319    reader.close();
320    deleteTestDir(fs);
321  }
322
323  @Test
324  public void testBlockContainingKey() throws Exception {
325    testBlockContainingKeyInternals(TagUsage.NO_TAG);
326    testBlockContainingKeyInternals(TagUsage.ONLY_TAG);
327    testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG);
328  }
329
330  protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException {
331    Path p = makeNewFile(tagUsage);
332    FileSystem fs = TEST_UTIL.getTestFileSystem();
333    Configuration conf = TEST_UTIL.getConfiguration();
334    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
335    HFileBlockIndex.BlockIndexReader blockIndexReader = reader.getDataBlockIndexReader();
336    System.out.println(blockIndexReader.toString());
337    // falls before the start of the file.
338    assertEquals(-1, blockIndexReader.rootBlockContainingKey(toKV("a", tagUsage)));
339    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("c", tagUsage)));
340    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("d", tagUsage)));
341    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("e", tagUsage)));
342    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("g", tagUsage)));
343    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage)));
344    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("i", tagUsage)));
345    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("j", tagUsage)));
346    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("k", tagUsage)));
347    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("l", tagUsage)));
348    reader.close();
349    deleteTestDir(fs);
350  }
351}