001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR;
021import static org.junit.Assert.assertArrayEquals;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertFalse;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertNull;
026import static org.junit.Assert.assertTrue;
027import static org.junit.Assert.fail;
028import static org.mockito.ArgumentMatchers.any;
029import static org.mockito.Mockito.mock;
030import static org.mockito.Mockito.when;
031
032import java.io.IOException;
033import java.nio.ByteBuffer;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Comparator;
038import java.util.List;
039import java.util.Map;
040import java.util.OptionalLong;
041import java.util.TreeSet;
042import java.util.function.BiFunction;
043import org.apache.hadoop.conf.Configuration;
044import org.apache.hadoop.fs.FileSystem;
045import org.apache.hadoop.fs.Path;
046import org.apache.hadoop.hbase.Cell;
047import org.apache.hadoop.hbase.CellUtil;
048import org.apache.hadoop.hbase.ExtendedCell;
049import org.apache.hadoop.hbase.HBaseClassTestRule;
050import org.apache.hadoop.hbase.HBaseTestingUtil;
051import org.apache.hadoop.hbase.HConstants;
052import org.apache.hadoop.hbase.KeyValue;
053import org.apache.hadoop.hbase.KeyValueUtil;
054import org.apache.hadoop.hbase.PrivateCellUtil;
055import org.apache.hadoop.hbase.TableDescriptors;
056import org.apache.hadoop.hbase.TableName;
057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
058import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
059import org.apache.hadoop.hbase.client.RegionInfo;
060import org.apache.hadoop.hbase.client.RegionInfoBuilder;
061import org.apache.hadoop.hbase.client.Scan;
062import org.apache.hadoop.hbase.client.TableDescriptor;
063import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
064import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
065import org.apache.hadoop.hbase.io.HFileLink;
066import org.apache.hadoop.hbase.io.compress.Compression;
067import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
068import org.apache.hadoop.hbase.io.hfile.BlockCache;
069import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
070import org.apache.hadoop.hbase.io.hfile.CacheConfig;
071import org.apache.hadoop.hbase.io.hfile.CacheStats;
072import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
073import org.apache.hadoop.hbase.io.hfile.HFile;
074import org.apache.hadoop.hbase.io.hfile.HFileBlock;
075import org.apache.hadoop.hbase.io.hfile.HFileContext;
076import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
077import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
078import org.apache.hadoop.hbase.io.hfile.HFileScanner;
079import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator;
080import org.apache.hadoop.hbase.io.hfile.ReaderContext;
081import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
082import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator;
083import org.apache.hadoop.hbase.master.MasterServices;
084import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
085import org.apache.hadoop.hbase.testclassification.MediumTests;
086import org.apache.hadoop.hbase.testclassification.RegionServerTests;
087import org.apache.hadoop.hbase.util.BloomFilterFactory;
088import org.apache.hadoop.hbase.util.Bytes;
089import org.apache.hadoop.hbase.util.ChecksumType;
090import org.apache.hadoop.hbase.util.CommonFSUtils;
091import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
092import org.junit.AfterClass;
093import org.junit.Before;
094import org.junit.ClassRule;
095import org.junit.Rule;
096import org.junit.Test;
097import org.junit.experimental.categories.Category;
098import org.junit.rules.TestName;
099import org.mockito.Mockito;
100import org.slf4j.Logger;
101import org.slf4j.LoggerFactory;
102
103import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
104import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
105import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
106
107/**
108 * Test HStoreFile
109 */
110@Category({ RegionServerTests.class, MediumTests.class })
111public class TestHStoreFile {
112
113  @ClassRule
114  public static final HBaseClassTestRule CLASS_RULE =
115    HBaseClassTestRule.forClass(TestHStoreFile.class);
116
117  private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class);
118  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
119  private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration());
120  private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile");
121  private static final ChecksumType CKTYPE = ChecksumType.CRC32C;
122  private static final int CKBYTES = 512;
123  private static String TEST_FAMILY = "cf";
124  private static final char FIRST_CHAR = 'a';
125  private static final char LAST_CHAR = 'z';
126
127  @Rule
128  public TestName name = new TestName();
129
130  private Configuration conf;
131  private Path testDir;
132  private FileSystem fs;
133
134  @Before
135  public void setUp() throws IOException {
136    conf = TEST_UTIL.getConfiguration();
137    testDir = TEST_UTIL.getDataTestDir(name.getMethodName());
138    fs = testDir.getFileSystem(conf);
139  }
140
141  @AfterClass
142  public static void tearDownAfterClass() {
143    TEST_UTIL.cleanupTestDir();
144  }
145
146  /**
147   * Write a file and then assert that we can read from top and bottom halves using two
148   * HalfMapFiles, as well as one HalfMapFile and one HFileLink file.
149   */
150  @Test
151  public void testBasicHalfAndHFileLinkMapFile() throws Exception {
152    final RegionInfo hri =
153      RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build();
154    // The locations of HFileLink refers hfiles only should be consistent with the table dir
155    // create by CommonFSUtils directory, so we should make the region directory under
156    // the mode of CommonFSUtils.getTableDir here.
157    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
158      CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri);
159
160    HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
161    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
162      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
163    writeStoreFile(writer);
164
165    Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
166    HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true);
167    checkHalfHFile(regionFs, sf);
168  }
169
170  private void writeStoreFile(final StoreFileWriter writer) throws IOException {
171    writeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
172      Bytes.toBytes(name.getMethodName()));
173  }
174
175  // pick an split point (roughly halfway)
176  byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR };
177
178  /*
179   * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it.
180   */
181  public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier)
182    throws IOException {
183    long now = EnvironmentEdgeManager.currentTime();
184    try {
185      for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
186        for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
187          byte[] b = new byte[] { (byte) d, (byte) e };
188          writer.append(new KeyValue(b, fam, qualifier, now, b));
189        }
190      }
191    } finally {
192      writer.close();
193    }
194  }
195
196  public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier,
197    int rounds) throws IOException {
198    long now = EnvironmentEdgeManager.currentTime();
199    try {
200      for (int i = 0; i < rounds; i++) {
201        for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
202          for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
203            byte[] b = new byte[] { (byte) d, (byte) e };
204            byte[] key = new byte[] { (byte) i };
205            writer.append(new KeyValue(key, fam, qualifier, now, b));
206          }
207        }
208      }
209    } finally {
210      writer.close();
211    }
212  }
213
214  /**
215   * Test that our mechanism of writing store files in one region to reference store files in other
216   * regions works.
217   */
218  @Test
219  public void testReference() throws IOException {
220    final RegionInfo hri =
221      RegionInfoBuilder.newBuilder(TableName.valueOf("testReferenceTb")).build();
222    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
223      new Path(testDir, hri.getTable().getNameAsString()), hri);
224
225    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
226    // Make a store file and write data to it.
227    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
228      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
229    writeStoreFile(writer);
230
231    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
232    HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
233    hsf.initReader();
234    StoreFileReader reader = hsf.getReader();
235    // Split on a row, not in middle of row. Midkey returned by reader
236    // may be in middle of row. Create new one with empty column and
237    // timestamp.
238    byte[] midRow = CellUtil.cloneRow(reader.midKey().get());
239    byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get());
240    hsf.closeStoreFile(true);
241
242    // Make a reference
243    RegionInfo splitHri = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(midRow).build();
244    Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true);
245    HStoreFile refHsf = new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true);
246    refHsf.initReader();
247    // Now confirm that I can read from the reference and that it only gets
248    // keys from top half of the file.
249    try (HFileScanner s = refHsf.getReader().getScanner(false, false, false)) {
250      Cell kv = null;
251      for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
252        ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey());
253        kv = KeyValueUtil.createKeyValueFromKey(bb);
254        if (first) {
255          assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0,
256            midRow.length));
257          first = false;
258        }
259      }
260      assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0,
261        finalRow.length));
262    }
263  }
264
265  @Test
266  public void testStoreFileReference() throws Exception {
267    final RegionInfo hri =
268      RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build();
269    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
270      new Path(testDir, hri.getTable().getNameAsString()), hri);
271    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
272
273    // Make a store file and write data to it.
274    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
275      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
276    writeStoreFile(writer);
277    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
278    writer.close();
279
280    HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
281    file.initReader();
282    StoreFileReader r = file.getReader();
283    assertNotNull(r);
284    StoreFileScanner scanner =
285      new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false);
286
287    // Verify after instantiating scanner refCount is increased
288    assertTrue("Verify file is being referenced", file.isReferencedInReads());
289    scanner.close();
290    // Verify after closing scanner refCount is decreased
291    assertFalse("Verify file is not being referenced", file.isReferencedInReads());
292  }
293
294  @Test
295  public void testEmptyStoreFileRestrictKeyRanges() throws Exception {
296    StoreFileReader reader = mock(StoreFileReader.class);
297    HStore store = mock(HStore.class);
298    byte[] cf = Bytes.toBytes("ty");
299    ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
300    when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
301    try (StoreFileScanner scanner =
302      new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) {
303      Scan scan = new Scan();
304      scan.setColumnFamilyTimeRange(cf, 0, 1);
305      assertFalse(scanner.shouldUseScanner(scan, store, 0));
306    }
307  }
308
309  @Test
310  public void testHFileLink() throws IOException {
311    final RegionInfo hri =
312      RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build();
313    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
314    Configuration testConf = new Configuration(this.conf);
315    CommonFSUtils.setRootDir(testConf, testDir);
316    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
317      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
318    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
319
320    // Make a store file and write data to it.
321    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
322      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
323    writeStoreFile(writer);
324
325    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
326    Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
327    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
328    Path linkFilePath =
329      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
330
331    // Try to open store file from link
332    StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath, true);
333    HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf);
334    assertTrue(storeFileInfo.isLink());
335    hsf.initReader();
336
337    // Now confirm that I can read from the link
338    int count = 0;
339    try (StoreFileScanner scanner = hsf.getPreadScanner(false, Long.MAX_VALUE, 0, false)) {
340      scanner.seek(KeyValue.LOWESTKEY);
341      while (scanner.next() != null) {
342        count++;
343      }
344    }
345    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
346  }
347
348  /**
349   * This test creates an hfile and then the dir structures and files to verify that references to
350   * hfilelinks (created by snapshot clones) can be properly interpreted.
351   */
352  @Test
353  public void testReferenceToHFileLink() throws IOException {
354    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
355    Configuration testConf = new Configuration(this.conf);
356    CommonFSUtils.setRootDir(testConf, testDir);
357
358    // adding legal table name chars to verify regex handles it.
359    RegionInfo hri = RegionInfoBuilder.newBuilder(TableName.valueOf("_original-evil-name")).build();
360    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
361      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
362
363    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
364    // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
365    StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs)
366      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
367    writeStoreFile(writer);
368    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
369
370    // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
371    RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build();
372    HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
373      CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone);
374    Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY);
375    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
376    Path linkFilePath =
377      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
378
379    // create splits of the link.
380    // <root>/clone/splitA/<cf>/<reftohfilelink>,
381    // <root>/clone/splitB/<cf>/<reftohfilelink>
382    RegionInfo splitHriA = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(SPLITKEY).build();
383    RegionInfo splitHriB =
384      RegionInfoBuilder.newBuilder(hri.getTable()).setStartKey(SPLITKEY).build();
385    HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true);
386    f.initReader();
387    Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top
388    Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom
389    f.closeStoreFile(true);
390    // OK test the thing
391    CommonFSUtils.logFileSystemState(fs, testDir, LOG);
392
393    // There is a case where a file with the hfilelink pattern is actually a daughter
394    // reference to a hfile link. This code in StoreFile that handles this case.
395
396    // Try to open store file from link
397    HStoreFile hsfA = new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true);
398    hsfA.initReader();
399
400    // Now confirm that I can read from the ref to link
401    int count = 0;
402    try (StoreFileScanner scanner = hsfA.getPreadScanner(false, Long.MAX_VALUE, 0, false)) {
403      scanner.seek(KeyValue.LOWESTKEY);
404      while (scanner.next() != null) {
405        count++;
406      }
407      assertTrue(count > 0); // read some rows here
408    }
409
410    // Try to open store file from link
411    HStoreFile hsfB = new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true);
412    hsfB.initReader();
413
414    // Now confirm that I can read from the ref to link
415    try (StoreFileScanner scanner = hsfB.getPreadScanner(false, Long.MAX_VALUE, 0, false)) {
416      scanner.seek(KeyValue.LOWESTKEY);
417      while (scanner.next() != null) {
418        count++;
419      }
420    }
421
422    // read the rest of the rows
423    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
424  }
425
426  private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f)
427    throws IOException {
428    f.initReader();
429    Cell midkey = f.getReader().midKey().get();
430    KeyValue midKV = (KeyValue) midkey;
431    // 1. test using the midRow as the splitKey, this test will generate two Reference files
432    // in the children
433    byte[] midRow = CellUtil.cloneRow(midKV);
434    // Create top split.
435    RegionInfo topHri =
436      RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()).setEndKey(SPLITKEY).build();
437    Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true);
438    // Create bottom split.
439    RegionInfo bottomHri = RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable())
440      .setStartKey(SPLITKEY).build();
441    Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false);
442    // Make readers on top and bottom.
443    HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
444    topF.initReader();
445    StoreFileReader top = topF.getReader();
446    HStoreFile bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
447    bottomF.initReader();
448    StoreFileReader bottom = bottomF.getReader();
449    ByteBuffer previous = null;
450    LOG.info("Midkey: " + midKV.toString());
451    ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey());
452    try {
453      // Now make two HalfMapFiles and assert they can read the full backing
454      // file, one from the top and the other from the bottom.
455      // Test bottom half first.
456      // Now test reading from the top.
457      boolean first = true;
458      ByteBuffer key = null;
459      try (HFileScanner topScanner = top.getScanner(false, false, false)) {
460        while (
461          (!topScanner.isSeeked() && topScanner.seekTo())
462            || (topScanner.isSeeked() && topScanner.next())
463        ) {
464          key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
465
466          if (
467            (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(),
468              key.arrayOffset(), key.limit())) > 0
469          ) {
470            fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey);
471          }
472          if (first) {
473            first = false;
474            LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
475          }
476        }
477      }
478      LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
479
480      first = true;
481      try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) {
482        while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
483          previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
484          key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
485          if (first) {
486            first = false;
487            LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
488          }
489          assertTrue(key.compareTo(bbMidkeyBytes) < 0);
490        }
491        if (previous != null) {
492          LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
493        }
494      }
495      // Remove references.
496      regionFs.cleanupDaughterRegion(topHri);
497      regionFs.cleanupDaughterRegion(bottomHri);
498
499      // 2. test using a midkey which will generate one Reference file and one HFileLink file.
500      // First, do a key that is < than first key. Ensure splits behave
501      // properly.
502      byte[] badmidkey = Bytes.toBytes("  .");
503      assertTrue(fs.exists(f.getPath()));
504      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
505      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
506
507      assertNull(bottomPath);
508
509      topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
510      topF.initReader();
511      top = topF.getReader();
512      // Now read from the top.
513      first = true;
514      try (HFileScanner topScanner = top.getScanner(false, false, false)) {
515        KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue();
516        while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) {
517          key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
518          keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit());
519          assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV,
520            badmidkey, 0, badmidkey.length) >= 0);
521          if (first) {
522            first = false;
523            KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
524            LOG.info("First top when key < bottom: " + keyKV);
525            String tmp =
526              Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
527            for (int i = 0; i < tmp.length(); i++) {
528              assertTrue(tmp.charAt(i) == 'a');
529            }
530          }
531        }
532        KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
533        LOG.info("Last top when key < bottom: " + keyKV);
534        String tmp =
535          Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
536        for (int i = 0; i < tmp.length(); i++) {
537          assertTrue(tmp.charAt(i) == 'z');
538        }
539      }
540      // Remove references.
541      regionFs.cleanupDaughterRegion(topHri);
542      regionFs.cleanupDaughterRegion(bottomHri);
543
544      // Test when badkey is > than last key in file ('||' > 'zz').
545      badmidkey = Bytes.toBytes("|||");
546      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
547      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
548      assertNull(topPath);
549
550      bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
551      bottomF.initReader();
552      bottom = bottomF.getReader();
553      first = true;
554      try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) {
555        while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
556          key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
557          if (first) {
558            first = false;
559            KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
560            LOG.info("First bottom when key > top: " + keyKV);
561            String tmp =
562              Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
563            for (int i = 0; i < tmp.length(); i++) {
564              assertTrue(tmp.charAt(i) == 'a');
565            }
566          }
567        }
568        KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
569        LOG.info("Last bottom when key > top: " + keyKV);
570        String tmp =
571          Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
572        for (int i = 0; i < tmp.length(); i++) {
573          assertTrue(Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength())
574            .charAt(i) == 'z');
575        }
576      }
577    } finally {
578      if (top != null) {
579        top.close(true); // evict since we are about to delete the file
580      }
581      if (bottom != null) {
582        bottom.close(true); // evict since we are about to delete the file
583      }
584      fs.delete(f.getPath(), true);
585    }
586  }
587
588  private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks,
589    boolean pread) {
590    return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false);
591  }
592
593  private static final String localFormatter = "%010d";
594
595  private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception {
596    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
597    Path f = writer.getPath();
598    long now = EnvironmentEdgeManager.currentTime();
599    for (int i = 0; i < 2000; i += 2) {
600      String row = String.format(localFormatter, i);
601      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
602        now, Bytes.toBytes("value"));
603      writer.append(kv);
604    }
605    writer.close();
606
607    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
608    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
609    storeFileInfo.initHFileInfo(context);
610    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
611    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
612    reader.loadFileInfo();
613    reader.loadBloomfilter();
614    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
615
616    // check false positives rate
617    int falsePos = 0;
618    int falseNeg = 0;
619    for (int i = 0; i < 2000; i++) {
620      String row = String.format(localFormatter, i);
621      TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
622      columns.add(Bytes.toBytes("family:col"));
623
624      Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
625      scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col"));
626      HStore store = mock(HStore.class);
627      when(store.getColumnFamilyDescriptor())
628        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
629      boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
630      if (i % 2 == 0) {
631        if (!exists) {
632          falseNeg++;
633        }
634      } else {
635        if (exists) {
636          falsePos++;
637        }
638      }
639    }
640    reader.close(true); // evict because we are about to delete the file
641    fs.delete(f, true);
642    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
643    int maxFalsePos = (int) (2 * 2000 * err);
644    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
645      + maxFalsePos + ")", falsePos <= maxFalsePos);
646  }
647
648  private static final int BLOCKSIZE_SMALL = 8192;
649
650  @Test
651  public void testBloomFilter() throws Exception {
652    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
653    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
654
655    // write the file
656    if (!fs.exists(ROOT_DIR)) {
657      fs.mkdirs(ROOT_DIR);
658    }
659    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
660    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
661      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
662    // Make a store file and write data to it.
663    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
664      .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build();
665    bloomWriteRead(writer, fs);
666  }
667
668  @Test
669  public void testDeleteFamilyBloomFilter() throws Exception {
670    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
671    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
672    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
673
674    // write the file
675    if (!fs.exists(ROOT_DIR)) {
676      fs.mkdirs(ROOT_DIR);
677    }
678    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
679
680    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
681      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
682    // Make a store file and write data to it.
683    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
684      .withMaxKeyCount(2000).withFileContext(meta).build();
685
686    // add delete family
687    long now = EnvironmentEdgeManager.currentTime();
688    for (int i = 0; i < 2000; i += 2) {
689      String row = String.format(localFormatter, i);
690      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
691        now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value"));
692      writer.append(kv);
693    }
694    writer.close();
695
696    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
697    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
698    storeFileInfo.initHFileInfo(context);
699    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
700    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
701    reader.loadFileInfo();
702    reader.loadBloomfilter();
703
704    // check false positives rate
705    int falsePos = 0;
706    int falseNeg = 0;
707    for (int i = 0; i < 2000; i++) {
708      String row = String.format(localFormatter, i);
709      byte[] rowKey = Bytes.toBytes(row);
710      boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length);
711      if (i % 2 == 0) {
712        if (!exists) {
713          falseNeg++;
714        }
715      } else {
716        if (exists) {
717          falsePos++;
718        }
719      }
720    }
721    assertEquals(1000, reader.getDeleteFamilyCnt());
722    reader.close(true); // evict because we are about to delete the file
723    fs.delete(f, true);
724    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
725    int maxFalsePos = (int) (2 * 2000 * err);
726    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
727      + maxFalsePos, falsePos <= maxFalsePos);
728  }
729
730  /**
731   * Test for HBASE-8012
732   */
733  @Test
734  public void testReseek() throws Exception {
735    // write the file
736    if (!fs.exists(ROOT_DIR)) {
737      fs.mkdirs(ROOT_DIR);
738    }
739    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
740
741    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
742    // Make a store file and write data to it.
743    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
744      .withFileContext(meta).build();
745
746    writeStoreFile(writer);
747    writer.close();
748
749    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
750    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
751    storeFileInfo.initHFileInfo(context);
752    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
753    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
754
755    // Now do reseek with empty KV to position to the beginning of the file
756
757    KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
758    StoreFileScanner s = getStoreFileScanner(reader, false, false);
759    s.reseek(k);
760
761    assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
762  }
763
764  @Test
765  public void testBloomTypes() throws Exception {
766    float err = (float) 0.01;
767    FileSystem fs = FileSystem.getLocal(conf);
768    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
769    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
770
771    int rowCount = 50;
772    int colCount = 10;
773    int versions = 2;
774
775    // run once using columns and once using rows
776    BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
777    int[] expKeys = { rowCount * colCount, rowCount };
778    // below line deserves commentary. it is expected bloom false positives
779    // column = rowCount*2*colCount inserts
780    // row-level = only rowCount*2 inserts, but failures will be magnified by
781    // 2nd for loop for every column (2*colCount)
782    float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
783
784    if (!fs.exists(ROOT_DIR)) {
785      fs.mkdirs(ROOT_DIR);
786    }
787    for (int x : new int[] { 0, 1 }) {
788      // write the file
789      Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
790
791      HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
792        .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
793      // Make a store file and write data to it.
794      StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
795        .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
796
797      long now = EnvironmentEdgeManager.currentTime();
798      for (int i = 0; i < rowCount * 2; i += 2) { // rows
799        for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers
800          String row = String.format(localFormatter, i);
801          String col = String.format(localFormatter, j);
802          for (int k = 0; k < versions; ++k) { // versions
803            KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
804              Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L));
805            writer.append(kv);
806          }
807        }
808      }
809      writer.close();
810
811      ReaderContext context =
812        new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen())
813          .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build();
814      StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
815      storeFileInfo.initHFileInfo(context);
816      StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
817      storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
818      reader.loadFileInfo();
819      reader.loadBloomfilter();
820      StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
821      assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount());
822
823      HStore store = mock(HStore.class);
824      when(store.getColumnFamilyDescriptor())
825        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
826      // check false positives rate
827      int falsePos = 0;
828      int falseNeg = 0;
829      for (int i = 0; i < rowCount * 2; ++i) { // rows
830        for (int j = 0; j < colCount * 2; ++j) { // column qualifiers
831          String row = String.format(localFormatter, i);
832          String col = String.format(localFormatter, j);
833          TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
834          columns.add(Bytes.toBytes("col" + col));
835
836          Scan scan =
837            new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
838          scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col)));
839
840          boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
841          boolean shouldRowExist = i % 2 == 0;
842          boolean shouldColExist = j % 2 == 0;
843          shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
844          if (shouldRowExist && shouldColExist) {
845            if (!exists) {
846              falseNeg++;
847            }
848          } else {
849            if (exists) {
850              falsePos++;
851            }
852          }
853        }
854      }
855      reader.close(true); // evict because we are about to delete the file
856      fs.delete(f, true);
857      System.out.println(bt[x].toString());
858      System.out.println("  False negatives: " + falseNeg);
859      System.out.println("  False positives: " + falsePos);
860      assertEquals(0, falseNeg);
861      assertTrue(falsePos < 2 * expErr[x]);
862    }
863  }
864
865  @Test
866  public void testSeqIdComparator() {
867    assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"),
868      mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"),
869      mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"),
870      mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"),
871      mockStoreFile(false, 76, -1, 5, "/foo/3"));
872  }
873
874  /**
875   * Assert that the given comparator orders the given storefiles in the same way that they're
876   * passed.
877   */
878  private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) {
879    ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs);
880    Collections.shuffle(sorted);
881    Collections.sort(sorted, comparator);
882    LOG.debug("sfs: " + Joiner.on(",").join(sfs));
883    LOG.debug("sorted: " + Joiner.on(",").join(sorted));
884    assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
885  }
886
887  /**
888   * Create a mock StoreFile with the given attributes.
889   */
890  private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId,
891    String path) {
892    HStoreFile mock = Mockito.mock(HStoreFile.class);
893    StoreFileReader reader = Mockito.mock(StoreFileReader.class);
894
895    Mockito.doReturn(size).when(reader).length();
896
897    Mockito.doReturn(reader).when(mock).getReader();
898    Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
899    Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp();
900    Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
901    Mockito.doReturn(new Path(path)).when(mock).getPath();
902    String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp
903      + " seqId=" + seqId + " path=" + path;
904    Mockito.doReturn(name).when(mock).toString();
905    return mock;
906  }
907
908  /**
909   * Generate a list of KeyValues for testing based on given parameters
910   * @return the rows key-value list
911   */
912  List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) {
913    List<KeyValue> kvList = new ArrayList<>();
914    for (int i = 1; i <= numRows; i++) {
915      byte[] b = Bytes.toBytes(i);
916      LOG.info(Bytes.toString(b));
917      LOG.info(Bytes.toString(b));
918      for (long timestamp : timestamps) {
919        kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
920      }
921    }
922    return kvList;
923  }
924
925  /**
926   * Test to ensure correctness when using StoreFile with multiple timestamps
927   */
928  @Test
929  public void testMultipleTimestamps() throws IOException {
930    byte[] family = Bytes.toBytes("familyname");
931    byte[] qualifier = Bytes.toBytes("qualifier");
932    int numRows = 10;
933    long[] timestamps = new long[] { 20, 10, 5, 1 };
934    Scan scan = new Scan();
935
936    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
937    Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family));
938    Path dir = new Path(storedir, "1234567890");
939    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
940    // Make a store file and write data to it.
941    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
942      .withOutputDir(dir).withFileContext(meta).build();
943
944    List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family);
945
946    for (KeyValue kv : kvList) {
947      writer.append(kv);
948    }
949    writer.appendMetadata(0, false);
950    writer.close();
951
952    HStoreFile hsf =
953      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
954    HStore store = mock(HStore.class);
955    when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family));
956    hsf.initReader();
957    StoreFileReader reader = hsf.getReader();
958    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
959    TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
960    columns.add(qualifier);
961
962    scan.setTimeRange(20, 100);
963    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
964
965    scan.setTimeRange(1, 2);
966    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
967
968    scan.setTimeRange(8, 10);
969    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
970
971    // lets make sure it still works with column family time ranges
972    scan.setColumnFamilyTimeRange(family, 7, 50);
973    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
974
975    // This test relies on the timestamp range optimization
976    scan = new Scan();
977    scan.setTimeRange(27, 50);
978    assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
979
980    // should still use the scanner because we override the family time range
981    scan = new Scan();
982    scan.setTimeRange(27, 50);
983    scan.setColumnFamilyTimeRange(family, 7, 50);
984    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
985  }
986
987  @Test
988  public void testCacheOnWriteEvictOnClose() throws Exception {
989    Configuration conf = this.conf;
990
991    // Find a home for our files (regiondir ("7e0102") and familyname).
992    Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC");
993
994    // Grab the block cache and get the initial hit/miss counts
995    BlockCache bc = BlockCacheFactory.createBlockCache(conf);
996    assertNotNull(bc);
997    CacheStats cs = bc.getStats();
998    long startHit = cs.getHitCount();
999    long startMiss = cs.getMissCount();
1000    long startEvicted = cs.getEvictedCount();
1001
1002    // Let's write a StoreFile with three blocks, with cache on write off
1003    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
1004    CacheConfig cacheConf = new CacheConfig(conf, bc);
1005    Path pathCowOff = new Path(baseDir, "123456789");
1006    StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
1007    HStoreFile hsf =
1008      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1009    LOG.debug(hsf.getPath().toString());
1010
1011    // Read this file, we should see 3 misses
1012    hsf.initReader();
1013    StoreFileReader reader = hsf.getReader();
1014    reader.loadFileInfo();
1015    StoreFileScanner scanner = getStoreFileScanner(reader, true, true);
1016    scanner.seek(KeyValue.LOWESTKEY);
1017    while (scanner.next() != null) {
1018      continue;
1019    }
1020    assertEquals(startHit, cs.getHitCount());
1021    assertEquals(startMiss + 3, cs.getMissCount());
1022    assertEquals(startEvicted, cs.getEvictedCount());
1023    startMiss += 3;
1024    scanner.close();
1025    reader.close(cacheConf.shouldEvictOnClose());
1026
1027    // Now write a StoreFile with three blocks, with cache on write on
1028    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
1029    cacheConf = new CacheConfig(conf, bc);
1030    Path pathCowOn = new Path(baseDir, "123456788");
1031    writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
1032    hsf = new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1033
1034    // Read this file, we should see 3 hits
1035    hsf.initReader();
1036    reader = hsf.getReader();
1037    scanner = getStoreFileScanner(reader, true, true);
1038    scanner.seek(KeyValue.LOWESTKEY);
1039    while (scanner.next() != null) {
1040      continue;
1041    }
1042    assertEquals(startHit + 3, cs.getHitCount());
1043    assertEquals(startMiss, cs.getMissCount());
1044    assertEquals(startEvicted, cs.getEvictedCount());
1045    startHit += 3;
1046    scanner.close();
1047    reader.close(cacheConf.shouldEvictOnClose());
1048
1049    // Let's read back the two files to ensure the blocks exactly match
1050    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1051    hsf.initReader();
1052    StoreFileReader readerOne = hsf.getReader();
1053    readerOne.loadFileInfo();
1054    StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true);
1055    scannerOne.seek(KeyValue.LOWESTKEY);
1056    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1057    hsf.initReader();
1058    StoreFileReader readerTwo = hsf.getReader();
1059    readerTwo.loadFileInfo();
1060    StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true);
1061    scannerTwo.seek(KeyValue.LOWESTKEY);
1062    ExtendedCell kv1 = null;
1063    ExtendedCell kv2 = null;
1064    while ((kv1 = scannerOne.next()) != null) {
1065      kv2 = scannerTwo.next();
1066      assertTrue(kv1.equals(kv2));
1067      KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1);
1068      KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2);
1069      assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(),
1070        keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0);
1071      assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(),
1072        kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
1073    }
1074    assertNull(scannerTwo.next());
1075    assertEquals(startHit + 6, cs.getHitCount());
1076    assertEquals(startMiss, cs.getMissCount());
1077    assertEquals(startEvicted, cs.getEvictedCount());
1078    startHit += 6;
1079    scannerOne.close();
1080    readerOne.close(cacheConf.shouldEvictOnClose());
1081    scannerTwo.close();
1082    readerTwo.close(cacheConf.shouldEvictOnClose());
1083
1084    // Let's close the first file with evict on close turned on
1085    conf.setBoolean("hbase.rs.evictblocksonclose", true);
1086    cacheConf = new CacheConfig(conf, bc);
1087    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1088    hsf.initReader();
1089    reader = hsf.getReader();
1090    reader.close(cacheConf.shouldEvictOnClose());
1091
1092    // We should have 3 new evictions but the evict count stat should not change. Eviction because
1093    // of HFile invalidation is not counted along with normal evictions
1094    assertEquals(startHit, cs.getHitCount());
1095    assertEquals(startMiss, cs.getMissCount());
1096    assertEquals(startEvicted, cs.getEvictedCount());
1097
1098    // Let's close the second file with evict on close turned off
1099    conf.setBoolean("hbase.rs.evictblocksonclose", false);
1100    cacheConf = new CacheConfig(conf, bc);
1101    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1102    hsf.initReader();
1103    reader = hsf.getReader();
1104    reader.close(cacheConf.shouldEvictOnClose());
1105
1106    // We expect no changes
1107    assertEquals(startHit, cs.getHitCount());
1108    assertEquals(startMiss, cs.getMissCount());
1109    assertEquals(startEvicted, cs.getEvictedCount());
1110  }
1111
1112  private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri,
1113    final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef)
1114    throws IOException {
1115    Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null);
1116    if (null == path) {
1117      return null;
1118    }
1119    List<Path> splitFiles = new ArrayList<>();
1120    splitFiles.add(path);
1121    MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class);
1122    MasterServices mockServices = mock(MasterServices.class);
1123    when(mockEnv.getMasterServices()).thenReturn(mockServices);
1124    when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration());
1125    TableDescriptors mockTblDescs = mock(TableDescriptors.class);
1126    when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs);
1127    TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable())
1128      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
1129    when(mockTblDescs.get(any())).thenReturn(mockTblDesc);
1130    Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv);
1131    return new Path(new Path(regionDir, family), path.getName());
1132  }
1133
1134  private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path,
1135    int numBlocks) throws IOException {
1136    // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
1137    int numKVs = 5 * numBlocks;
1138    List<KeyValue> kvs = new ArrayList<>(numKVs);
1139    byte[] b = Bytes.toBytes("x");
1140    int totalSize = 0;
1141    for (int i = numKVs; i > 0; i--) {
1142      KeyValue kv = new KeyValue(b, b, b, i, b);
1143      kvs.add(kv);
1144      // kv has memstoreTS 0, which takes 1 byte to store.
1145      totalSize += kv.getLength() + 1;
1146    }
1147    int blockSize = totalSize / numBlocks;
1148    HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE)
1149      .withBytesPerCheckSum(CKBYTES).build();
1150    // Make a store file and write data to it.
1151    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1152      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1153    // We'll write N-1 KVs to ensure we don't write an extra block
1154    kvs.remove(kvs.size() - 1);
1155    for (KeyValue kv : kvs) {
1156      writer.append(kv);
1157    }
1158    writer.appendMetadata(0, false);
1159    writer.close();
1160    return writer;
1161  }
1162
1163  /**
1164   * Check if data block encoding information is saved correctly in HFile's file info.
1165   */
1166  @Test
1167  public void testDataBlockEncodingMetaData() throws IOException {
1168    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1169    Path dir = new Path(new Path(testDir, "7e0102"), "familyname");
1170    Path path = new Path(dir, "1234567890");
1171
1172    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1173    cacheConf = new CacheConfig(conf);
1174    HFileContext meta =
1175      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1176        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1177    // Make a store file and write data to it.
1178    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1179      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1180    writer.close();
1181
1182    HStoreFile storeFile =
1183      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1184    storeFile.initReader();
1185    StoreFileReader reader = storeFile.getReader();
1186
1187    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1188    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1189    assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
1190  }
1191
1192  @Test
1193  public void testDataBlockSizeEncoded() throws Exception {
1194    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1195    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1196    Path path = new Path(dir, "1234567890");
1197
1198    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1199
1200    conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1);
1201
1202    cacheConf = new CacheConfig(conf);
1203    HFileContext meta =
1204      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1205        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1206    // Make a store file and write data to it.
1207    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1208      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1209    writeStoreFile(writer);
1210
1211    HStoreFile storeFile =
1212      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1213    storeFile.initReader();
1214    StoreFileReader reader = storeFile.getReader();
1215
1216    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1217    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1218    assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value));
1219
1220    HFile.Reader fReader =
1221      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1222
1223    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1224    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1225    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1226    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1227    HFileBlock block;
1228    while (offset <= max) {
1229      block = fReader.readBlock(offset, -1, /* cacheBlock */
1230        false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */
1231        false, null, null);
1232      offset += block.getOnDiskSizeWithHeader();
1233      double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL;
1234      if (offset <= max) {
1235        assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05));
1236      }
1237    }
1238  }
1239
1240  @Test
1241  public void testDataBlockSizeCompressed() throws Exception {
1242    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR,
1243      PreviousBlockCompressionRatePredicator.class.getName());
1244    testDataBlockSizeWithCompressionRatePredicator(12,
1245      (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true);
1246  }
1247
1248  @Test
1249  public void testDataBlockSizeUnCompressed() throws Exception {
1250    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName());
1251    testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10);
1252  }
1253
1254  private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount,
1255    BiFunction<Integer, Integer, Boolean> validation) throws Exception {
1256    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1257    Path path = new Path(dir, "1234567890");
1258    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1259    cacheConf = new CacheConfig(conf);
1260    HFileContext meta =
1261      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1262        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo)
1263        .withCompression(Compression.Algorithm.GZ).build();
1264    // Make a store file and write data to it.
1265    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1266      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1267    writeLargeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
1268      Bytes.toBytes(name.getMethodName()), 200);
1269    writer.close();
1270    HStoreFile storeFile =
1271      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1272    storeFile.initReader();
1273    HFile.Reader fReader =
1274      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1275    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1276    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1277    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1278    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1279    HFileBlock block;
1280    int blockCount = 0;
1281    while (offset <= max) {
1282      block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
1283        /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
1284      offset += block.getOnDiskSizeWithHeader();
1285      blockCount++;
1286      assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount));
1287    }
1288    assertEquals(expectedBlockCount, blockCount);
1289  }
1290
1291}