001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.Iterator;
028import java.util.List;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.ArrayBackedTag;
034import org.apache.hadoop.hbase.ByteBufferKeyValue;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.HBaseClassTestRule;
037import org.apache.hadoop.hbase.HBaseTestingUtility;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.KeyValue;
040import org.apache.hadoop.hbase.PrivateCellUtil;
041import org.apache.hadoop.hbase.Tag;
042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
043import org.apache.hadoop.hbase.testclassification.IOTests;
044import org.apache.hadoop.hbase.testclassification.SmallTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.junit.Before;
047import org.junit.ClassRule;
048import org.junit.Test;
049import org.junit.experimental.categories.Category;
050import org.junit.runner.RunWith;
051import org.junit.runners.Parameterized;
052import org.junit.runners.Parameterized.Parameters;
053
054/**
055 * Test {@link HFileScanner#seekTo(Cell)} and its variants.
056 */
057@Category({IOTests.class, SmallTests.class})
058@RunWith(Parameterized.class)
059public class TestSeekTo {
060
061  @ClassRule
062  public static final HBaseClassTestRule CLASS_RULE =
063      HBaseClassTestRule.forClass(TestSeekTo.class);
064
065  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
066  private final DataBlockEncoding encoding;
067  @Parameters
068  public static Collection<Object[]> parameters() {
069    List<Object[]> paramList = new ArrayList<>();
070    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
071      paramList.add(new Object[] { encoding });
072    }
073    return paramList;
074  }
075  static boolean switchKVs = false;
076
077  public TestSeekTo(DataBlockEncoding encoding) {
078    this.encoding = encoding;
079  }
080
081  @Before
082  public void setUp() {
083    //reset
084    switchKVs = false;
085  }
086
087  static KeyValue toKV(String row, TagUsage tagUsage) {
088    if (tagUsage == TagUsage.NO_TAG) {
089      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
090          Bytes.toBytes("value"));
091    } else if (tagUsage == TagUsage.ONLY_TAG) {
092      Tag t = new ArrayBackedTag((byte) 1, "myTag1");
093      Tag[] tags = new Tag[1];
094      tags[0] = t;
095      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
096          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
097    } else {
098      if (!switchKVs) {
099        switchKVs = true;
100        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
101            Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"));
102      } else {
103        switchKVs = false;
104        Tag t = new ArrayBackedTag((byte) 1, "myTag1");
105        Tag[] tags = new Tag[1];
106        tags[0] = t;
107        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
108            Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
109      }
110    }
111  }
112  static String toRowStr(Cell c) {
113    return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength());
114  }
115
116  Path makeNewFile(TagUsage tagUsage) throws IOException {
117    Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile");
118    FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile);
119    int blocksize = toKV("a", tagUsage).getLength() * 3;
120    HFileContext context = new HFileContextBuilder().withBlockSize(blocksize)
121        .withDataBlockEncoding(encoding)
122        .withIncludesTags(true).build();
123    Configuration conf = TEST_UTIL.getConfiguration();
124    HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withOutputStream(fout)
125        .withFileContext(context).create();
126    // 4 bytes * 3 * 2 for each key/value +
127    // 3 for keys, 15 for values = 42 (woot)
128    writer.append(toKV("c", tagUsage));
129    writer.append(toKV("e", tagUsage));
130    writer.append(toKV("g", tagUsage));
131    // block transition
132    writer.append(toKV("i", tagUsage));
133    writer.append(toKV("k", tagUsage));
134    writer.close();
135    fout.close();
136    return ncTFile;
137  }
138
139  @Test
140  public void testSeekBefore() throws Exception {
141    testSeekBeforeInternals(TagUsage.NO_TAG);
142    testSeekBeforeInternals(TagUsage.ONLY_TAG);
143    testSeekBeforeInternals(TagUsage.PARTIAL_TAG);
144  }
145
146  protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException {
147    Path p = makeNewFile(tagUsage);
148    FileSystem fs = TEST_UTIL.getTestFileSystem();
149    Configuration conf = TEST_UTIL.getConfiguration();
150    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
151    HFileScanner scanner = reader.getScanner(false, true);
152    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
153
154    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
155
156    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
157    assertEquals("c", toRowStr(scanner.getCell()));
158
159    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
160    assertEquals("c", toRowStr(scanner.getCell()));
161
162    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
163    assertEquals("e", toRowStr(scanner.getCell()));
164
165    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
166    assertEquals("e", toRowStr(scanner.getCell()));
167    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
168    assertEquals("g", toRowStr(scanner.getCell()));
169    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
170    assertEquals("g", toRowStr(scanner.getCell()));
171    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
172    assertEquals("i", toRowStr(scanner.getCell()));
173    Cell cell = scanner.getCell();
174    if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) {
175      Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell);
176      while (tagsIterator.hasNext()) {
177        Tag next = tagsIterator.next();
178        assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next)));
179      }
180    }
181    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
182    assertEquals("i", toRowStr(scanner.getCell()));
183    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
184    assertEquals("k", toRowStr(scanner.getCell()));
185
186    reader.close();
187    deleteTestDir(fs);
188  }
189
190  protected void deleteTestDir(FileSystem fs) throws IOException {
191    Path dataTestDir = TEST_UTIL.getDataTestDir();
192    if(fs.exists(dataTestDir)) {
193      fs.delete(dataTestDir, true);
194    }
195  }
196
197  @Test
198  public void testSeekBeforeWithReSeekTo() throws Exception {
199    testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG);
200    testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG);
201    testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG);
202  }
203
204  protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException {
205    Path p = makeNewFile(tagUsage);
206    FileSystem fs = TEST_UTIL.getTestFileSystem();
207    Configuration conf = TEST_UTIL.getConfiguration();
208    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
209    HFileScanner scanner = reader.getScanner(false, true);
210    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
211    assertFalse(scanner.seekBefore(toKV("b", tagUsage)));
212    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
213
214    // seekBefore d, so the scanner points to c
215    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
216    assertFalse(scanner.getCell() instanceof ByteBufferKeyValue);
217    assertEquals("c", toRowStr(scanner.getCell()));
218    // reseekTo e and g
219    assertEquals(0, scanner.reseekTo(toKV("c", tagUsage)));
220    assertEquals("c", toRowStr(scanner.getCell()));
221    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
222    assertEquals("g", toRowStr(scanner.getCell()));
223
224    // seekBefore e, so the scanner points to c
225    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
226    assertEquals("c", toRowStr(scanner.getCell()));
227    // reseekTo e and g
228    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
229    assertEquals("e", toRowStr(scanner.getCell()));
230    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
231    assertEquals("g", toRowStr(scanner.getCell()));
232
233    // seekBefore f, so the scanner points to e
234    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
235    assertEquals("e", toRowStr(scanner.getCell()));
236    // reseekTo e and g
237    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
238    assertEquals("e", toRowStr(scanner.getCell()));
239    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
240    assertEquals("g", toRowStr(scanner.getCell()));
241
242    // seekBefore g, so the scanner points to e
243    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
244    assertEquals("e", toRowStr(scanner.getCell()));
245    // reseekTo e and g again
246    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
247    assertEquals("e", toRowStr(scanner.getCell()));
248    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
249    assertEquals("g", toRowStr(scanner.getCell()));
250
251    // seekBefore h, so the scanner points to g
252    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
253    assertEquals("g", toRowStr(scanner.getCell()));
254    // reseekTo g
255    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
256    assertEquals("g", toRowStr(scanner.getCell()));
257
258    // seekBefore i, so the scanner points to g
259    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
260    assertEquals("g", toRowStr(scanner.getCell()));
261    // reseekTo g
262    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
263    assertEquals("g", toRowStr(scanner.getCell()));
264
265    // seekBefore j, so the scanner points to i
266    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
267    assertEquals("i", toRowStr(scanner.getCell()));
268    // reseekTo i
269    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
270    assertEquals("i", toRowStr(scanner.getCell()));
271
272    // seekBefore k, so the scanner points to i
273    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
274    assertEquals("i", toRowStr(scanner.getCell()));
275    // reseekTo i and k
276    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
277    assertEquals("i", toRowStr(scanner.getCell()));
278    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
279    assertEquals("k", toRowStr(scanner.getCell()));
280
281    // seekBefore l, so the scanner points to k
282    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
283    assertEquals("k", toRowStr(scanner.getCell()));
284    // reseekTo k
285    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
286    assertEquals("k", toRowStr(scanner.getCell()));
287    deleteTestDir(fs);
288  }
289
290  @Test
291  public void testSeekTo() throws Exception {
292    testSeekToInternals(TagUsage.NO_TAG);
293    testSeekToInternals(TagUsage.ONLY_TAG);
294    testSeekToInternals(TagUsage.PARTIAL_TAG);
295  }
296
297  protected void testSeekToInternals(TagUsage tagUsage) throws IOException {
298    Path p = makeNewFile(tagUsage);
299    FileSystem fs = TEST_UTIL.getTestFileSystem();
300    Configuration conf = TEST_UTIL.getConfiguration();
301    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
302    assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount());
303    HFileScanner scanner = reader.getScanner(false, true);
304    // lies before the start of the file.
305    assertEquals(-1, scanner.seekTo(toKV("a", tagUsage)));
306
307    assertEquals(1, scanner.seekTo(toKV("d", tagUsage)));
308    assertEquals("c", toRowStr(scanner.getCell()));
309
310    // Across a block boundary now.
311    // 'h' does not exist so we will get a '1' back for not found.
312    assertEquals(0, scanner.seekTo(toKV("i", tagUsage)));
313    assertEquals("i", toRowStr(scanner.getCell()));
314
315    assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
316    assertEquals("k", toRowStr(scanner.getCell()));
317
318    reader.close();
319    deleteTestDir(fs);
320  }
321
322  @Test
323  public void testBlockContainingKey() throws Exception {
324    testBlockContainingKeyInternals(TagUsage.NO_TAG);
325    testBlockContainingKeyInternals(TagUsage.ONLY_TAG);
326    testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG);
327  }
328
329  protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException {
330    Path p = makeNewFile(tagUsage);
331    FileSystem fs = TEST_UTIL.getTestFileSystem();
332    Configuration conf = TEST_UTIL.getConfiguration();
333    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
334    HFileBlockIndex.BlockIndexReader blockIndexReader =
335      reader.getDataBlockIndexReader();
336    System.out.println(blockIndexReader.toString());
337    // falls before the start of the file.
338    assertEquals(-1, blockIndexReader.rootBlockContainingKey(
339        toKV("a", tagUsage)));
340    assertEquals(0, blockIndexReader.rootBlockContainingKey(
341        toKV("c", tagUsage)));
342    assertEquals(0, blockIndexReader.rootBlockContainingKey(
343        toKV("d", tagUsage)));
344    assertEquals(0, blockIndexReader.rootBlockContainingKey(
345        toKV("e", tagUsage)));
346    assertEquals(0, blockIndexReader.rootBlockContainingKey(
347        toKV("g", tagUsage)));
348    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage)));
349    assertEquals(1, blockIndexReader.rootBlockContainingKey(
350        toKV("i", tagUsage)));
351    assertEquals(1, blockIndexReader.rootBlockContainingKey(
352        toKV("j", tagUsage)));
353    assertEquals(1, blockIndexReader.rootBlockContainingKey(
354        toKV("k", tagUsage)));
355    assertEquals(1, blockIndexReader.rootBlockContainingKey(
356        toKV("l", tagUsage)));
357    reader.close();
358    deleteTestDir(fs);
359  }
360}