001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNull;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.FileStatus;
029import org.apache.hadoop.fs.FileSystem;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.hbase.Cell;
032import org.apache.hadoop.hbase.CellComparatorImpl;
033import org.apache.hadoop.hbase.CellUtil;
034import org.apache.hadoop.hbase.ExtendedCell;
035import org.apache.hadoop.hbase.HBaseClassTestRule;
036import org.apache.hadoop.hbase.HBaseTestingUtil;
037import org.apache.hadoop.hbase.KeyValue;
038import org.apache.hadoop.hbase.KeyValueUtil;
039import org.apache.hadoop.hbase.io.hfile.CacheConfig;
040import org.apache.hadoop.hbase.io.hfile.HFile;
041import org.apache.hadoop.hbase.io.hfile.HFileContext;
042import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
043import org.apache.hadoop.hbase.io.hfile.HFileScanner;
044import org.apache.hadoop.hbase.io.hfile.ReaderContext;
045import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
046import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
047import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
048import org.apache.hadoop.hbase.testclassification.IOTests;
049import org.apache.hadoop.hbase.testclassification.SmallTests;
050import org.apache.hadoop.hbase.util.Bytes;
051import org.junit.AfterClass;
052import org.junit.BeforeClass;
053import org.junit.ClassRule;
054import org.junit.Test;
055import org.junit.experimental.categories.Category;
056
057@Category({ IOTests.class, SmallTests.class })
058public class TestHalfStoreFileReader {
059
060  @ClassRule
061  public static final HBaseClassTestRule CLASS_RULE =
062    HBaseClassTestRule.forClass(TestHalfStoreFileReader.class);
063
064  private static HBaseTestingUtil TEST_UTIL;
065
066  @BeforeClass
067  public static void setupBeforeClass() throws Exception {
068    TEST_UTIL = new HBaseTestingUtil();
069  }
070
071  @AfterClass
072  public static void tearDownAfterClass() throws Exception {
073    TEST_UTIL.cleanupTestDir();
074  }
075
076  /**
077   * Test the scanner and reseek of a half hfile scanner. The scanner API demands that seekTo and
078   * reseekTo() only return < 0 if the key lies before the start of the file (with no position on
079   * the scanner). Returning 0 if perfect match (rare), and return > 1 if we got an imperfect match.
080   * The latter case being the most common, we should generally be returning 1, and if we do, there
081   * may or may not be a 'next' in the scanner/file. A bug in the half file scanner was returning -1
082   * at the end of the bottom half, and that was causing the infrastructure above to go null causing
083   * NPEs and other problems. This test reproduces that failure, and also tests both the bottom and
084   * top of the file while we are at it.
085   */
086  @Test
087  public void testHalfScanAndReseek() throws IOException {
088    Configuration conf = TEST_UTIL.getConfiguration();
089    FileSystem fs = FileSystem.get(conf);
090    String root_dir = TEST_UTIL.getDataTestDir().toString();
091    Path parentPath = new Path(new Path(root_dir, "parent"), "CF");
092    fs.mkdirs(parentPath);
093    Path splitAPath = new Path(new Path(root_dir, "splita"), "CF");
094    Path splitBPath = new Path(new Path(root_dir, "splitb"), "CF");
095    Path filePath = StoreFileWriter.getUniqueFile(fs, parentPath);
096
097    CacheConfig cacheConf = new CacheConfig(conf);
098    HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
099    HFile.Writer w =
100      HFile.getWriterFactory(conf, cacheConf).withPath(fs, filePath).withFileContext(meta).create();
101
102    // write some things.
103    List<KeyValue> items = genSomeKeys();
104    for (KeyValue kv : items) {
105      w.append(kv);
106    }
107    w.close();
108
109    HFile.Reader r = HFile.createReader(fs, filePath, cacheConf, true, conf);
110    Cell midKV = r.midKey().get();
111    byte[] midkey = CellUtil.cloneRow(midKV);
112
113    Path splitFileA = new Path(splitAPath, filePath.getName() + ".parent");
114    Path splitFileB = new Path(splitBPath, filePath.getName() + ".parent");
115
116    Reference bottom = new Reference(midkey, Reference.Range.bottom);
117    bottom.write(fs, splitFileA);
118    doTestOfScanAndReseek(splitFileA, fs, bottom, cacheConf);
119
120    Reference top = new Reference(midkey, Reference.Range.top);
121    top.write(fs, splitFileB);
122    doTestOfScanAndReseek(splitFileB, fs, top, cacheConf);
123
124    r.close();
125  }
126
127  private void doTestOfScanAndReseek(Path p, FileSystem fs, Reference bottom, CacheConfig cacheConf)
128    throws IOException {
129    Path referencePath = StoreFileInfo.getReferredToFile(p);
130    FSDataInputStreamWrapper in = new FSDataInputStreamWrapper(fs, referencePath, false, 0);
131    FileStatus status = fs.getFileStatus(referencePath);
132    long length = status.getLen();
133    ReaderContextBuilder contextBuilder =
134      new ReaderContextBuilder().withInputStreamWrapper(in).withFileSize(length)
135        .withReaderType(ReaderContext.ReaderType.PREAD).withFileSystem(fs).withFilePath(p);
136    ReaderContext context = contextBuilder.build();
137    StoreFileInfo storeFileInfo = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true);
138    storeFileInfo.initHFileInfo(context);
139    final HalfStoreFileReader halfreader =
140      (HalfStoreFileReader) storeFileInfo.createReader(context, cacheConf);
141    storeFileInfo.getHFileInfo().initMetaAndIndex(halfreader.getHFileReader());
142    halfreader.loadFileInfo();
143    try (HFileScanner scanner = halfreader.getScanner(false, false, false)) {
144
145      scanner.seekTo();
146      Cell curr;
147      do {
148        curr = scanner.getCell();
149        KeyValue reseekKv = getLastOnCol(curr);
150        int ret = scanner.reseekTo(reseekKv);
151        assertTrue("reseek to returned: " + ret, ret > 0);
152        // System.out.println(curr + ": " + ret);
153      } while (scanner.next());
154
155      int ret = scanner.reseekTo(getLastOnCol(curr));
156      // System.out.println("Last reseek: " + ret);
157      assertTrue(ret > 0);
158    }
159
160    halfreader.close(true);
161  }
162
163  // Tests the scanner on an HFile that is backed by HalfStoreFiles
164  @Test
165  public void testHalfScanner() throws IOException {
166    String root_dir = TEST_UTIL.getDataTestDir().toString();
167    Path p = new Path(root_dir, "test");
168    Configuration conf = TEST_UTIL.getConfiguration();
169    FileSystem fs = FileSystem.get(conf);
170    CacheConfig cacheConf = new CacheConfig(conf);
171    HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
172    HFile.Writer w =
173      HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create();
174
175    // write some things.
176    List<KeyValue> items = genSomeKeys();
177    for (KeyValue kv : items) {
178      w.append(kv);
179    }
180    w.close();
181
182    HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf);
183    ExtendedCell midKV = r.midKey().get();
184    byte[] midkey = CellUtil.cloneRow(midKV);
185
186    Reference bottom = new Reference(midkey, Reference.Range.bottom);
187    Reference top = new Reference(midkey, Reference.Range.top);
188
189    // Ugly code to get the item before the midkey
190    KeyValue beforeMidKey = null;
191    for (KeyValue item : items) {
192      if (CellComparatorImpl.COMPARATOR.compare(item, midKV) >= 0) {
193        break;
194      }
195      beforeMidKey = item;
196    }
197    System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey));
198    System.out.println("beforeMidKey: " + beforeMidKey);
199
200    // Seek on the splitKey, should be in top, not in bottom
201    Cell foundKeyValue = doTestOfSeekBefore(p, fs, bottom, midKV, cacheConf);
202    assertEquals(beforeMidKey, foundKeyValue);
203
204    // Seek tot the last thing should be the penultimate on the top, the one before the midkey on
205    // the bottom.
206    foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(items.size() - 1), cacheConf);
207    assertEquals(items.get(items.size() - 2), foundKeyValue);
208
209    foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(items.size() - 1), cacheConf);
210    assertEquals(beforeMidKey, foundKeyValue);
211
212    // Try and seek before something that is in the bottom.
213    foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(0), cacheConf);
214    assertNull(foundKeyValue);
215
216    // Try and seek before the first thing.
217    foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(0), cacheConf);
218    assertNull(foundKeyValue);
219
220    // Try and seek before the second thing in the top and bottom.
221    foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(1), cacheConf);
222    assertNull(foundKeyValue);
223
224    foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(1), cacheConf);
225    assertEquals(items.get(0), foundKeyValue);
226
227    // Try to seek before the splitKey in the top file
228    foundKeyValue = doTestOfSeekBefore(p, fs, top, midKV, cacheConf);
229    assertNull(foundKeyValue);
230  }
231
232  private Cell doTestOfSeekBefore(Path p, FileSystem fs, Reference bottom, ExtendedCell seekBefore,
233    CacheConfig cacheConfig) throws IOException {
234    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build();
235    StoreFileInfo storeFileInfo =
236      new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, fs.getFileStatus(p), bottom);
237    storeFileInfo.initHFileInfo(context);
238    final HalfStoreFileReader halfreader =
239      (HalfStoreFileReader) storeFileInfo.createReader(context, cacheConfig);
240    storeFileInfo.getHFileInfo().initMetaAndIndex(halfreader.getHFileReader());
241    halfreader.loadFileInfo();
242    try (HFileScanner scanner = halfreader.getScanner(false, false, false)) {
243      scanner.seekBefore(seekBefore);
244      if (scanner.getCell() != null) {
245        return KeyValueUtil.copyToNewKeyValue(scanner.getCell());
246      } else {
247        return null;
248      }
249    }
250  }
251
252  private KeyValue getLastOnCol(Cell curr) {
253    return KeyValueUtil.createLastOnRow(curr.getRowArray(), curr.getRowOffset(),
254      curr.getRowLength(), curr.getFamilyArray(), curr.getFamilyOffset(), curr.getFamilyLength(),
255      curr.getQualifierArray(), curr.getQualifierOffset(), curr.getQualifierLength());
256  }
257
258  static final int SIZE = 1000;
259
260  static byte[] _b(String s) {
261    return Bytes.toBytes(s);
262  }
263
264  List<KeyValue> genSomeKeys() {
265    List<KeyValue> ret = new ArrayList<>(SIZE);
266    for (int i = 0; i < SIZE; i++) {
267      KeyValue kv =
268        new KeyValue(_b(String.format("row_%04d", i)), _b("family"), _b("qualifier"), 1000, // timestamp
269          _b("value"));
270      ret.add(kv);
271    }
272    return ret;
273  }
274}