001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.Iterator;
028import java.util.List;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.ArrayBackedTag;
034import org.apache.hadoop.hbase.ByteBufferKeyValue;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.CellComparatorImpl;
037import org.apache.hadoop.hbase.HBaseClassTestRule;
038import org.apache.hadoop.hbase.HBaseTestingUtility;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.KeyValue;
041import org.apache.hadoop.hbase.PrivateCellUtil;
042import org.apache.hadoop.hbase.Tag;
043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
044import org.apache.hadoop.hbase.testclassification.IOTests;
045import org.apache.hadoop.hbase.testclassification.SmallTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.junit.Before;
048import org.junit.ClassRule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.junit.runner.RunWith;
052import org.junit.runners.Parameterized;
053import org.junit.runners.Parameterized.Parameters;
054
055/**
056 * Test {@link HFileScanner#seekTo(Cell)} and its variants.
057 */
058@Category({IOTests.class, SmallTests.class})
059@RunWith(Parameterized.class)
060public class TestSeekTo {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064      HBaseClassTestRule.forClass(TestSeekTo.class);
065
066  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
067  private final DataBlockEncoding encoding;
068  @Parameters
069  public static Collection<Object[]> parameters() {
070    List<Object[]> paramList = new ArrayList<>();
071    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
072      paramList.add(new Object[] { encoding });
073    }
074    return paramList;
075  }
076  static boolean switchKVs = false;
077
078  public TestSeekTo(DataBlockEncoding encoding) {
079    this.encoding = encoding;
080  }
081
082  @Before
083  public void setUp() {
084    //reset
085    switchKVs = false;
086  }
087
088  static KeyValue toKV(String row, TagUsage tagUsage) {
089    if (tagUsage == TagUsage.NO_TAG) {
090      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
091          Bytes.toBytes("value"));
092    } else if (tagUsage == TagUsage.ONLY_TAG) {
093      Tag t = new ArrayBackedTag((byte) 1, "myTag1");
094      Tag[] tags = new Tag[1];
095      tags[0] = t;
096      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
097          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
098    } else {
099      if (!switchKVs) {
100        switchKVs = true;
101        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
102            Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"));
103      } else {
104        switchKVs = false;
105        Tag t = new ArrayBackedTag((byte) 1, "myTag1");
106        Tag[] tags = new Tag[1];
107        tags[0] = t;
108        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
109            Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
110      }
111    }
112  }
113  static String toRowStr(Cell c) {
114    return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength());
115  }
116
117  Path makeNewFile(TagUsage tagUsage) throws IOException {
118    Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile");
119    FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile);
120    int blocksize = toKV("a", tagUsage).getLength() * 3;
121    HFileContext context = new HFileContextBuilder().withBlockSize(blocksize)
122        .withDataBlockEncoding(encoding)
123        .withIncludesTags(true).build();
124    Configuration conf = TEST_UTIL.getConfiguration();
125    HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withOutputStream(fout)
126        .withFileContext(context)
127        .withComparator(CellComparatorImpl.COMPARATOR).create();
128    // 4 bytes * 3 * 2 for each key/value +
129    // 3 for keys, 15 for values = 42 (woot)
130    writer.append(toKV("c", tagUsage));
131    writer.append(toKV("e", tagUsage));
132    writer.append(toKV("g", tagUsage));
133    // block transition
134    writer.append(toKV("i", tagUsage));
135    writer.append(toKV("k", tagUsage));
136    writer.close();
137    fout.close();
138    return ncTFile;
139  }
140
141  @Test
142  public void testSeekBefore() throws Exception {
143    testSeekBeforeInternals(TagUsage.NO_TAG);
144    testSeekBeforeInternals(TagUsage.ONLY_TAG);
145    testSeekBeforeInternals(TagUsage.PARTIAL_TAG);
146  }
147
148  protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException {
149    Path p = makeNewFile(tagUsage);
150    FileSystem fs = TEST_UTIL.getTestFileSystem();
151    Configuration conf = TEST_UTIL.getConfiguration();
152    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
153    reader.loadFileInfo();
154    HFileScanner scanner = reader.getScanner(false, true);
155    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
156
157    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
158
159    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
160    assertEquals("c", toRowStr(scanner.getCell()));
161
162    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
163    assertEquals("c", toRowStr(scanner.getCell()));
164
165    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
166    assertEquals("e", toRowStr(scanner.getCell()));
167
168    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
169    assertEquals("e", toRowStr(scanner.getCell()));
170    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
171    assertEquals("g", toRowStr(scanner.getCell()));
172    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
173    assertEquals("g", toRowStr(scanner.getCell()));
174    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
175    assertEquals("i", toRowStr(scanner.getCell()));
176    Cell cell = scanner.getCell();
177    if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) {
178      Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell);
179      while (tagsIterator.hasNext()) {
180        Tag next = tagsIterator.next();
181        assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next)));
182      }
183    }
184    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
185    assertEquals("i", toRowStr(scanner.getCell()));
186    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
187    assertEquals("k", toRowStr(scanner.getCell()));
188
189    reader.close();
190    deleteTestDir(fs);
191  }
192
193  protected void deleteTestDir(FileSystem fs) throws IOException {
194    Path dataTestDir = TEST_UTIL.getDataTestDir();
195    if(fs.exists(dataTestDir)) {
196      fs.delete(dataTestDir, true);
197    }
198  }
199
200  @Test
201  public void testSeekBeforeWithReSeekTo() throws Exception {
202    testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG);
203    testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG);
204    testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG);
205  }
206
207  protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException {
208    Path p = makeNewFile(tagUsage);
209    FileSystem fs = TEST_UTIL.getTestFileSystem();
210    Configuration conf = TEST_UTIL.getConfiguration();
211    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
212    reader.loadFileInfo();
213    HFileScanner scanner = reader.getScanner(false, true);
214    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
215    assertFalse(scanner.seekBefore(toKV("b", tagUsage)));
216    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
217
218    // seekBefore d, so the scanner points to c
219    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
220    assertFalse(scanner.getCell() instanceof ByteBufferKeyValue);
221    assertEquals("c", toRowStr(scanner.getCell()));
222    // reseekTo e and g
223    assertEquals(0, scanner.reseekTo(toKV("c", tagUsage)));
224    assertEquals("c", toRowStr(scanner.getCell()));
225    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
226    assertEquals("g", toRowStr(scanner.getCell()));
227
228    // seekBefore e, so the scanner points to c
229    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
230    assertEquals("c", toRowStr(scanner.getCell()));
231    // reseekTo e and g
232    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
233    assertEquals("e", toRowStr(scanner.getCell()));
234    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
235    assertEquals("g", toRowStr(scanner.getCell()));
236
237    // seekBefore f, so the scanner points to e
238    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
239    assertEquals("e", toRowStr(scanner.getCell()));
240    // reseekTo e and g
241    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
242    assertEquals("e", toRowStr(scanner.getCell()));
243    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
244    assertEquals("g", toRowStr(scanner.getCell()));
245
246    // seekBefore g, so the scanner points to e
247    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
248    assertEquals("e", toRowStr(scanner.getCell()));
249    // reseekTo e and g again
250    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
251    assertEquals("e", toRowStr(scanner.getCell()));
252    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
253    assertEquals("g", toRowStr(scanner.getCell()));
254
255    // seekBefore h, so the scanner points to g
256    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
257    assertEquals("g", toRowStr(scanner.getCell()));
258    // reseekTo g
259    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
260    assertEquals("g", toRowStr(scanner.getCell()));
261
262    // seekBefore i, so the scanner points to g
263    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
264    assertEquals("g", toRowStr(scanner.getCell()));
265    // reseekTo g
266    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
267    assertEquals("g", toRowStr(scanner.getCell()));
268
269    // seekBefore j, so the scanner points to i
270    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
271    assertEquals("i", toRowStr(scanner.getCell()));
272    // reseekTo i
273    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
274    assertEquals("i", toRowStr(scanner.getCell()));
275
276    // seekBefore k, so the scanner points to i
277    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
278    assertEquals("i", toRowStr(scanner.getCell()));
279    // reseekTo i and k
280    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
281    assertEquals("i", toRowStr(scanner.getCell()));
282    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
283    assertEquals("k", toRowStr(scanner.getCell()));
284
285    // seekBefore l, so the scanner points to k
286    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
287    assertEquals("k", toRowStr(scanner.getCell()));
288    // reseekTo k
289    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
290    assertEquals("k", toRowStr(scanner.getCell()));
291    deleteTestDir(fs);
292  }
293
294  @Test
295  public void testSeekTo() throws Exception {
296    testSeekToInternals(TagUsage.NO_TAG);
297    testSeekToInternals(TagUsage.ONLY_TAG);
298    testSeekToInternals(TagUsage.PARTIAL_TAG);
299  }
300
301  protected void testSeekToInternals(TagUsage tagUsage) throws IOException {
302    Path p = makeNewFile(tagUsage);
303    FileSystem fs = TEST_UTIL.getTestFileSystem();
304    Configuration conf = TEST_UTIL.getConfiguration();
305    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
306    reader.loadFileInfo();
307    assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount());
308    HFileScanner scanner = reader.getScanner(false, true);
309    // lies before the start of the file.
310    assertEquals(-1, scanner.seekTo(toKV("a", tagUsage)));
311
312    assertEquals(1, scanner.seekTo(toKV("d", tagUsage)));
313    assertEquals("c", toRowStr(scanner.getCell()));
314
315    // Across a block boundary now.
316    // 'h' does not exist so we will get a '1' back for not found.
317    assertEquals(0, scanner.seekTo(toKV("i", tagUsage)));
318    assertEquals("i", toRowStr(scanner.getCell()));
319
320    assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
321    assertEquals("k", toRowStr(scanner.getCell()));
322
323    reader.close();
324    deleteTestDir(fs);
325  }
326
327  @Test
328  public void testBlockContainingKey() throws Exception {
329    testBlockContainingKeyInternals(TagUsage.NO_TAG);
330    testBlockContainingKeyInternals(TagUsage.ONLY_TAG);
331    testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG);
332  }
333
334  protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException {
335    Path p = makeNewFile(tagUsage);
336    FileSystem fs = TEST_UTIL.getTestFileSystem();
337    Configuration conf = TEST_UTIL.getConfiguration();
338    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
339    reader.loadFileInfo();
340    HFileBlockIndex.BlockIndexReader blockIndexReader =
341      reader.getDataBlockIndexReader();
342    System.out.println(blockIndexReader.toString());
343    // falls before the start of the file.
344    assertEquals(-1, blockIndexReader.rootBlockContainingKey(
345        toKV("a", tagUsage)));
346    assertEquals(0, blockIndexReader.rootBlockContainingKey(
347        toKV("c", tagUsage)));
348    assertEquals(0, blockIndexReader.rootBlockContainingKey(
349        toKV("d", tagUsage)));
350    assertEquals(0, blockIndexReader.rootBlockContainingKey(
351        toKV("e", tagUsage)));
352    assertEquals(0, blockIndexReader.rootBlockContainingKey(
353        toKV("g", tagUsage)));
354    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage)));
355    assertEquals(1, blockIndexReader.rootBlockContainingKey(
356        toKV("i", tagUsage)));
357    assertEquals(1, blockIndexReader.rootBlockContainingKey(
358        toKV("j", tagUsage)));
359    assertEquals(1, blockIndexReader.rootBlockContainingKey(
360        toKV("k", tagUsage)));
361    assertEquals(1, blockIndexReader.rootBlockContainingKey(
362        toKV("l", tagUsage)));
363    reader.close();
364    deleteTestDir(fs);
365  }
366}