001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR;
021import static org.junit.Assert.assertArrayEquals;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertFalse;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertNull;
026import static org.junit.Assert.assertTrue;
027import static org.junit.Assert.fail;
028import static org.mockito.ArgumentMatchers.any;
029import static org.mockito.Mockito.mock;
030import static org.mockito.Mockito.when;
031
032import java.io.IOException;
033import java.nio.ByteBuffer;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Comparator;
038import java.util.List;
039import java.util.Map;
040import java.util.OptionalLong;
041import java.util.TreeSet;
042import java.util.function.BiFunction;
043import org.apache.hadoop.conf.Configuration;
044import org.apache.hadoop.fs.FileSystem;
045import org.apache.hadoop.fs.Path;
046import org.apache.hadoop.hbase.Cell;
047import org.apache.hadoop.hbase.CellUtil;
048import org.apache.hadoop.hbase.HBaseClassTestRule;
049import org.apache.hadoop.hbase.HBaseTestingUtil;
050import org.apache.hadoop.hbase.HConstants;
051import org.apache.hadoop.hbase.KeyValue;
052import org.apache.hadoop.hbase.KeyValueUtil;
053import org.apache.hadoop.hbase.PrivateCellUtil;
054import org.apache.hadoop.hbase.TableDescriptors;
055import org.apache.hadoop.hbase.TableName;
056import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
058import org.apache.hadoop.hbase.client.RegionInfo;
059import org.apache.hadoop.hbase.client.RegionInfoBuilder;
060import org.apache.hadoop.hbase.client.Scan;
061import org.apache.hadoop.hbase.client.TableDescriptor;
062import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
063import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
064import org.apache.hadoop.hbase.io.HFileLink;
065import org.apache.hadoop.hbase.io.compress.Compression;
066import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
067import org.apache.hadoop.hbase.io.hfile.BlockCache;
068import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
069import org.apache.hadoop.hbase.io.hfile.CacheConfig;
070import org.apache.hadoop.hbase.io.hfile.CacheStats;
071import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
072import org.apache.hadoop.hbase.io.hfile.HFile;
073import org.apache.hadoop.hbase.io.hfile.HFileBlock;
074import org.apache.hadoop.hbase.io.hfile.HFileContext;
075import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
076import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
077import org.apache.hadoop.hbase.io.hfile.HFileScanner;
078import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator;
079import org.apache.hadoop.hbase.io.hfile.ReaderContext;
080import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
081import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator;
082import org.apache.hadoop.hbase.master.MasterServices;
083import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
084import org.apache.hadoop.hbase.testclassification.MediumTests;
085import org.apache.hadoop.hbase.testclassification.RegionServerTests;
086import org.apache.hadoop.hbase.util.BloomFilterFactory;
087import org.apache.hadoop.hbase.util.Bytes;
088import org.apache.hadoop.hbase.util.ChecksumType;
089import org.apache.hadoop.hbase.util.CommonFSUtils;
090import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
091import org.junit.AfterClass;
092import org.junit.Before;
093import org.junit.ClassRule;
094import org.junit.Rule;
095import org.junit.Test;
096import org.junit.experimental.categories.Category;
097import org.junit.rules.TestName;
098import org.mockito.Mockito;
099import org.slf4j.Logger;
100import org.slf4j.LoggerFactory;
101
102import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
103import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
104import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
105
106/**
107 * Test HStoreFile
108 */
109@Category({ RegionServerTests.class, MediumTests.class })
110public class TestHStoreFile {
111
112  @ClassRule
113  public static final HBaseClassTestRule CLASS_RULE =
114    HBaseClassTestRule.forClass(TestHStoreFile.class);
115
116  private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class);
117  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
118  private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration());
119  private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile");
120  private static final ChecksumType CKTYPE = ChecksumType.CRC32C;
121  private static final int CKBYTES = 512;
122  private static String TEST_FAMILY = "cf";
123  private static final char FIRST_CHAR = 'a';
124  private static final char LAST_CHAR = 'z';
125
126  @Rule
127  public TestName name = new TestName();
128
129  private Configuration conf;
130  private Path testDir;
131  private FileSystem fs;
132
133  @Before
134  public void setUp() throws IOException {
135    conf = TEST_UTIL.getConfiguration();
136    testDir = TEST_UTIL.getDataTestDir(name.getMethodName());
137    fs = testDir.getFileSystem(conf);
138  }
139
140  @AfterClass
141  public static void tearDownAfterClass() {
142    TEST_UTIL.cleanupTestDir();
143  }
144
145  /**
146   * Write a file and then assert that we can read from top and bottom halves using two
147   * HalfMapFiles, as well as one HalfMapFile and one HFileLink file.
148   */
149  @Test
150  public void testBasicHalfAndHFileLinkMapFile() throws Exception {
151    final RegionInfo hri =
152      RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build();
153    // The locations of HFileLink refers hfiles only should be consistent with the table dir
154    // create by CommonFSUtils directory, so we should make the region directory under
155    // the mode of CommonFSUtils.getTableDir here.
156    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
157      CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri);
158
159    HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
160    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
161      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
162    writeStoreFile(writer);
163
164    Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
165    HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true);
166    checkHalfHFile(regionFs, sf);
167  }
168
169  private void writeStoreFile(final StoreFileWriter writer) throws IOException {
170    writeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
171      Bytes.toBytes(name.getMethodName()));
172  }
173
174  // pick an split point (roughly halfway)
175  byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR };
176
177  /*
178   * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it.
179   */
180  public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier)
181    throws IOException {
182    long now = EnvironmentEdgeManager.currentTime();
183    try {
184      for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
185        for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
186          byte[] b = new byte[] { (byte) d, (byte) e };
187          writer.append(new KeyValue(b, fam, qualifier, now, b));
188        }
189      }
190    } finally {
191      writer.close();
192    }
193  }
194
195  public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier,
196    int rounds) throws IOException {
197    long now = EnvironmentEdgeManager.currentTime();
198    try {
199      for (int i = 0; i < rounds; i++) {
200        for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
201          for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
202            byte[] b = new byte[] { (byte) d, (byte) e };
203            byte[] key = new byte[] { (byte) i };
204            writer.append(new KeyValue(key, fam, qualifier, now, b));
205          }
206        }
207      }
208    } finally {
209      writer.close();
210    }
211  }
212
213  /**
214   * Test that our mechanism of writing store files in one region to reference store files in other
215   * regions works.
216   */
217  @Test
218  public void testReference() throws IOException {
219    final RegionInfo hri =
220      RegionInfoBuilder.newBuilder(TableName.valueOf("testReferenceTb")).build();
221    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
222      new Path(testDir, hri.getTable().getNameAsString()), hri);
223
224    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
225    // Make a store file and write data to it.
226    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
227      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
228    writeStoreFile(writer);
229
230    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
231    HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
232    hsf.initReader();
233    StoreFileReader reader = hsf.getReader();
234    // Split on a row, not in middle of row. Midkey returned by reader
235    // may be in middle of row. Create new one with empty column and
236    // timestamp.
237    byte[] midRow = CellUtil.cloneRow(reader.midKey().get());
238    byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get());
239    hsf.closeStoreFile(true);
240
241    // Make a reference
242    RegionInfo splitHri = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(midRow).build();
243    Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true);
244    HStoreFile refHsf = new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true);
245    refHsf.initReader();
246    // Now confirm that I can read from the reference and that it only gets
247    // keys from top half of the file.
248    try (HFileScanner s = refHsf.getReader().getScanner(false, false, false)) {
249      Cell kv = null;
250      for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
251        ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey());
252        kv = KeyValueUtil.createKeyValueFromKey(bb);
253        if (first) {
254          assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0,
255            midRow.length));
256          first = false;
257        }
258      }
259      assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0,
260        finalRow.length));
261    }
262  }
263
264  @Test
265  public void testStoreFileReference() throws Exception {
266    final RegionInfo hri =
267      RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build();
268    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
269      new Path(testDir, hri.getTable().getNameAsString()), hri);
270    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
271
272    // Make a store file and write data to it.
273    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
274      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
275    writeStoreFile(writer);
276    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
277    writer.close();
278
279    HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
280    file.initReader();
281    StoreFileReader r = file.getReader();
282    assertNotNull(r);
283    StoreFileScanner scanner =
284      new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false);
285
286    // Verify after instantiating scanner refCount is increased
287    assertTrue("Verify file is being referenced", file.isReferencedInReads());
288    scanner.close();
289    // Verify after closing scanner refCount is decreased
290    assertFalse("Verify file is not being referenced", file.isReferencedInReads());
291  }
292
293  @Test
294  public void testEmptyStoreFileRestrictKeyRanges() throws Exception {
295    StoreFileReader reader = mock(StoreFileReader.class);
296    HStore store = mock(HStore.class);
297    byte[] cf = Bytes.toBytes("ty");
298    ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
299    when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
300    try (StoreFileScanner scanner =
301      new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) {
302      Scan scan = new Scan();
303      scan.setColumnFamilyTimeRange(cf, 0, 1);
304      assertFalse(scanner.shouldUseScanner(scan, store, 0));
305    }
306  }
307
308  @Test
309  public void testHFileLink() throws IOException {
310    final RegionInfo hri =
311      RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build();
312    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
313    Configuration testConf = new Configuration(this.conf);
314    CommonFSUtils.setRootDir(testConf, testDir);
315    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
316      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
317    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
318
319    // Make a store file and write data to it.
320    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
321      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
322    writeStoreFile(writer);
323
324    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
325    Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
326    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
327    Path linkFilePath =
328      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
329
330    // Try to open store file from link
331    StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath, true);
332    HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf);
333    assertTrue(storeFileInfo.isLink());
334    hsf.initReader();
335
336    // Now confirm that I can read from the link
337    int count = 0;
338    try (StoreFileScanner scanner = hsf.getPreadScanner(false, Long.MAX_VALUE, 0, false)) {
339      scanner.seek(KeyValue.LOWESTKEY);
340      while (scanner.next() != null) {
341        count++;
342      }
343    }
344    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
345  }
346
347  /**
348   * This test creates an hfile and then the dir structures and files to verify that references to
349   * hfilelinks (created by snapshot clones) can be properly interpreted.
350   */
351  @Test
352  public void testReferenceToHFileLink() throws IOException {
353    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
354    Configuration testConf = new Configuration(this.conf);
355    CommonFSUtils.setRootDir(testConf, testDir);
356
357    // adding legal table name chars to verify regex handles it.
358    RegionInfo hri = RegionInfoBuilder.newBuilder(TableName.valueOf("_original-evil-name")).build();
359    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
360      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
361
362    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
363    // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
364    StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs)
365      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
366    writeStoreFile(writer);
367    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
368
369    // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
370    RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build();
371    HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
372      CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone);
373    Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY);
374    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
375    Path linkFilePath =
376      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
377
378    // create splits of the link.
379    // <root>/clone/splitA/<cf>/<reftohfilelink>,
380    // <root>/clone/splitB/<cf>/<reftohfilelink>
381    RegionInfo splitHriA = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(SPLITKEY).build();
382    RegionInfo splitHriB =
383      RegionInfoBuilder.newBuilder(hri.getTable()).setStartKey(SPLITKEY).build();
384    HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true);
385    f.initReader();
386    Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top
387    Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom
388    f.closeStoreFile(true);
389    // OK test the thing
390    CommonFSUtils.logFileSystemState(fs, testDir, LOG);
391
392    // There is a case where a file with the hfilelink pattern is actually a daughter
393    // reference to a hfile link. This code in StoreFile that handles this case.
394
395    // Try to open store file from link
396    HStoreFile hsfA = new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true);
397    hsfA.initReader();
398
399    // Now confirm that I can read from the ref to link
400    int count = 0;
401    try (StoreFileScanner scanner = hsfA.getPreadScanner(false, Long.MAX_VALUE, 0, false)) {
402      scanner.seek(KeyValue.LOWESTKEY);
403      while (scanner.next() != null) {
404        count++;
405      }
406      assertTrue(count > 0); // read some rows here
407    }
408
409    // Try to open store file from link
410    HStoreFile hsfB = new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true);
411    hsfB.initReader();
412
413    // Now confirm that I can read from the ref to link
414    try (StoreFileScanner scanner = hsfB.getPreadScanner(false, Long.MAX_VALUE, 0, false)) {
415      scanner.seek(KeyValue.LOWESTKEY);
416      while (scanner.next() != null) {
417        count++;
418      }
419    }
420
421    // read the rest of the rows
422    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
423  }
424
425  private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f)
426    throws IOException {
427    f.initReader();
428    Cell midkey = f.getReader().midKey().get();
429    KeyValue midKV = (KeyValue) midkey;
430    // 1. test using the midRow as the splitKey, this test will generate two Reference files
431    // in the children
432    byte[] midRow = CellUtil.cloneRow(midKV);
433    // Create top split.
434    RegionInfo topHri =
435      RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()).setEndKey(SPLITKEY).build();
436    Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true);
437    // Create bottom split.
438    RegionInfo bottomHri = RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable())
439      .setStartKey(SPLITKEY).build();
440    Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false);
441    // Make readers on top and bottom.
442    HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
443    topF.initReader();
444    StoreFileReader top = topF.getReader();
445    HStoreFile bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
446    bottomF.initReader();
447    StoreFileReader bottom = bottomF.getReader();
448    ByteBuffer previous = null;
449    LOG.info("Midkey: " + midKV.toString());
450    ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey());
451    try {
452      // Now make two HalfMapFiles and assert they can read the full backing
453      // file, one from the top and the other from the bottom.
454      // Test bottom half first.
455      // Now test reading from the top.
456      boolean first = true;
457      ByteBuffer key = null;
458      try (HFileScanner topScanner = top.getScanner(false, false, false)) {
459        while (
460          (!topScanner.isSeeked() && topScanner.seekTo())
461            || (topScanner.isSeeked() && topScanner.next())
462        ) {
463          key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
464
465          if (
466            (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(),
467              key.arrayOffset(), key.limit())) > 0
468          ) {
469            fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey);
470          }
471          if (first) {
472            first = false;
473            LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
474          }
475        }
476      }
477      LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
478
479      first = true;
480      try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) {
481        while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
482          previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
483          key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
484          if (first) {
485            first = false;
486            LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
487          }
488          assertTrue(key.compareTo(bbMidkeyBytes) < 0);
489        }
490        if (previous != null) {
491          LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
492        }
493      }
494      // Remove references.
495      regionFs.cleanupDaughterRegion(topHri);
496      regionFs.cleanupDaughterRegion(bottomHri);
497
498      // 2. test using a midkey which will generate one Reference file and one HFileLink file.
499      // First, do a key that is < than first key. Ensure splits behave
500      // properly.
501      byte[] badmidkey = Bytes.toBytes("  .");
502      assertTrue(fs.exists(f.getPath()));
503      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
504      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
505
506      assertNull(bottomPath);
507
508      topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
509      topF.initReader();
510      top = topF.getReader();
511      // Now read from the top.
512      first = true;
513      try (HFileScanner topScanner = top.getScanner(false, false, false)) {
514        KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue();
515        while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) {
516          key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
517          keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit());
518          assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV,
519            badmidkey, 0, badmidkey.length) >= 0);
520          if (first) {
521            first = false;
522            KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
523            LOG.info("First top when key < bottom: " + keyKV);
524            String tmp =
525              Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
526            for (int i = 0; i < tmp.length(); i++) {
527              assertTrue(tmp.charAt(i) == 'a');
528            }
529          }
530        }
531        KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
532        LOG.info("Last top when key < bottom: " + keyKV);
533        String tmp =
534          Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
535        for (int i = 0; i < tmp.length(); i++) {
536          assertTrue(tmp.charAt(i) == 'z');
537        }
538      }
539      // Remove references.
540      regionFs.cleanupDaughterRegion(topHri);
541      regionFs.cleanupDaughterRegion(bottomHri);
542
543      // Test when badkey is > than last key in file ('||' > 'zz').
544      badmidkey = Bytes.toBytes("|||");
545      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
546      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
547      assertNull(topPath);
548
549      bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
550      bottomF.initReader();
551      bottom = bottomF.getReader();
552      first = true;
553      try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) {
554        while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
555          key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
556          if (first) {
557            first = false;
558            KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
559            LOG.info("First bottom when key > top: " + keyKV);
560            String tmp =
561              Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
562            for (int i = 0; i < tmp.length(); i++) {
563              assertTrue(tmp.charAt(i) == 'a');
564            }
565          }
566        }
567        KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
568        LOG.info("Last bottom when key > top: " + keyKV);
569        String tmp =
570          Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
571        for (int i = 0; i < tmp.length(); i++) {
572          assertTrue(Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength())
573            .charAt(i) == 'z');
574        }
575      }
576    } finally {
577      if (top != null) {
578        top.close(true); // evict since we are about to delete the file
579      }
580      if (bottom != null) {
581        bottom.close(true); // evict since we are about to delete the file
582      }
583      fs.delete(f.getPath(), true);
584    }
585  }
586
587  private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks,
588    boolean pread) {
589    return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false);
590  }
591
592  private static final String localFormatter = "%010d";
593
594  private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception {
595    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
596    Path f = writer.getPath();
597    long now = EnvironmentEdgeManager.currentTime();
598    for (int i = 0; i < 2000; i += 2) {
599      String row = String.format(localFormatter, i);
600      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
601        now, Bytes.toBytes("value"));
602      writer.append(kv);
603    }
604    writer.close();
605
606    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
607    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
608    storeFileInfo.initHFileInfo(context);
609    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
610    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
611    reader.loadFileInfo();
612    reader.loadBloomfilter();
613    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
614
615    // check false positives rate
616    int falsePos = 0;
617    int falseNeg = 0;
618    for (int i = 0; i < 2000; i++) {
619      String row = String.format(localFormatter, i);
620      TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
621      columns.add(Bytes.toBytes("family:col"));
622
623      Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
624      scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col"));
625      HStore store = mock(HStore.class);
626      when(store.getColumnFamilyDescriptor())
627        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
628      boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
629      if (i % 2 == 0) {
630        if (!exists) {
631          falseNeg++;
632        }
633      } else {
634        if (exists) {
635          falsePos++;
636        }
637      }
638    }
639    reader.close(true); // evict because we are about to delete the file
640    fs.delete(f, true);
641    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
642    int maxFalsePos = (int) (2 * 2000 * err);
643    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
644      + maxFalsePos + ")", falsePos <= maxFalsePos);
645  }
646
647  private static final int BLOCKSIZE_SMALL = 8192;
648
649  @Test
650  public void testBloomFilter() throws Exception {
651    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
652    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
653
654    // write the file
655    if (!fs.exists(ROOT_DIR)) {
656      fs.mkdirs(ROOT_DIR);
657    }
658    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
659    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
660      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
661    // Make a store file and write data to it.
662    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
663      .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build();
664    bloomWriteRead(writer, fs);
665  }
666
667  @Test
668  public void testDeleteFamilyBloomFilter() throws Exception {
669    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
670    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
671    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
672
673    // write the file
674    if (!fs.exists(ROOT_DIR)) {
675      fs.mkdirs(ROOT_DIR);
676    }
677    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
678
679    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
680      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
681    // Make a store file and write data to it.
682    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
683      .withMaxKeyCount(2000).withFileContext(meta).build();
684
685    // add delete family
686    long now = EnvironmentEdgeManager.currentTime();
687    for (int i = 0; i < 2000; i += 2) {
688      String row = String.format(localFormatter, i);
689      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
690        now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value"));
691      writer.append(kv);
692    }
693    writer.close();
694
695    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
696    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
697    storeFileInfo.initHFileInfo(context);
698    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
699    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
700    reader.loadFileInfo();
701    reader.loadBloomfilter();
702
703    // check false positives rate
704    int falsePos = 0;
705    int falseNeg = 0;
706    for (int i = 0; i < 2000; i++) {
707      String row = String.format(localFormatter, i);
708      byte[] rowKey = Bytes.toBytes(row);
709      boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length);
710      if (i % 2 == 0) {
711        if (!exists) {
712          falseNeg++;
713        }
714      } else {
715        if (exists) {
716          falsePos++;
717        }
718      }
719    }
720    assertEquals(1000, reader.getDeleteFamilyCnt());
721    reader.close(true); // evict because we are about to delete the file
722    fs.delete(f, true);
723    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
724    int maxFalsePos = (int) (2 * 2000 * err);
725    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
726      + maxFalsePos, falsePos <= maxFalsePos);
727  }
728
729  /**
730   * Test for HBASE-8012
731   */
732  @Test
733  public void testReseek() throws Exception {
734    // write the file
735    if (!fs.exists(ROOT_DIR)) {
736      fs.mkdirs(ROOT_DIR);
737    }
738    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
739
740    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
741    // Make a store file and write data to it.
742    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
743      .withFileContext(meta).build();
744
745    writeStoreFile(writer);
746    writer.close();
747
748    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
749    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
750    storeFileInfo.initHFileInfo(context);
751    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
752    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
753
754    // Now do reseek with empty KV to position to the beginning of the file
755
756    KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
757    StoreFileScanner s = getStoreFileScanner(reader, false, false);
758    s.reseek(k);
759
760    assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
761  }
762
763  @Test
764  public void testBloomTypes() throws Exception {
765    float err = (float) 0.01;
766    FileSystem fs = FileSystem.getLocal(conf);
767    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
768    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
769
770    int rowCount = 50;
771    int colCount = 10;
772    int versions = 2;
773
774    // run once using columns and once using rows
775    BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
776    int[] expKeys = { rowCount * colCount, rowCount };
777    // below line deserves commentary. it is expected bloom false positives
778    // column = rowCount*2*colCount inserts
779    // row-level = only rowCount*2 inserts, but failures will be magnified by
780    // 2nd for loop for every column (2*colCount)
781    float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
782
783    if (!fs.exists(ROOT_DIR)) {
784      fs.mkdirs(ROOT_DIR);
785    }
786    for (int x : new int[] { 0, 1 }) {
787      // write the file
788      Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
789
790      HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
791        .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
792      // Make a store file and write data to it.
793      StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
794        .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
795
796      long now = EnvironmentEdgeManager.currentTime();
797      for (int i = 0; i < rowCount * 2; i += 2) { // rows
798        for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers
799          String row = String.format(localFormatter, i);
800          String col = String.format(localFormatter, j);
801          for (int k = 0; k < versions; ++k) { // versions
802            KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
803              Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L));
804            writer.append(kv);
805          }
806        }
807      }
808      writer.close();
809
810      ReaderContext context =
811        new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen())
812          .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build();
813      StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
814      storeFileInfo.initHFileInfo(context);
815      StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
816      storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
817      reader.loadFileInfo();
818      reader.loadBloomfilter();
819      StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
820      assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount());
821
822      HStore store = mock(HStore.class);
823      when(store.getColumnFamilyDescriptor())
824        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
825      // check false positives rate
826      int falsePos = 0;
827      int falseNeg = 0;
828      for (int i = 0; i < rowCount * 2; ++i) { // rows
829        for (int j = 0; j < colCount * 2; ++j) { // column qualifiers
830          String row = String.format(localFormatter, i);
831          String col = String.format(localFormatter, j);
832          TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
833          columns.add(Bytes.toBytes("col" + col));
834
835          Scan scan =
836            new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
837          scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col)));
838
839          boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
840          boolean shouldRowExist = i % 2 == 0;
841          boolean shouldColExist = j % 2 == 0;
842          shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
843          if (shouldRowExist && shouldColExist) {
844            if (!exists) {
845              falseNeg++;
846            }
847          } else {
848            if (exists) {
849              falsePos++;
850            }
851          }
852        }
853      }
854      reader.close(true); // evict because we are about to delete the file
855      fs.delete(f, true);
856      System.out.println(bt[x].toString());
857      System.out.println("  False negatives: " + falseNeg);
858      System.out.println("  False positives: " + falsePos);
859      assertEquals(0, falseNeg);
860      assertTrue(falsePos < 2 * expErr[x]);
861    }
862  }
863
864  @Test
865  public void testSeqIdComparator() {
866    assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"),
867      mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"),
868      mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"),
869      mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"),
870      mockStoreFile(false, 76, -1, 5, "/foo/3"));
871  }
872
873  /**
874   * Assert that the given comparator orders the given storefiles in the same way that they're
875   * passed.
876   */
877  private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) {
878    ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs);
879    Collections.shuffle(sorted);
880    Collections.sort(sorted, comparator);
881    LOG.debug("sfs: " + Joiner.on(",").join(sfs));
882    LOG.debug("sorted: " + Joiner.on(",").join(sorted));
883    assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
884  }
885
886  /**
887   * Create a mock StoreFile with the given attributes.
888   */
889  private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId,
890    String path) {
891    HStoreFile mock = Mockito.mock(HStoreFile.class);
892    StoreFileReader reader = Mockito.mock(StoreFileReader.class);
893
894    Mockito.doReturn(size).when(reader).length();
895
896    Mockito.doReturn(reader).when(mock).getReader();
897    Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
898    Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp();
899    Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
900    Mockito.doReturn(new Path(path)).when(mock).getPath();
901    String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp
902      + " seqId=" + seqId + " path=" + path;
903    Mockito.doReturn(name).when(mock).toString();
904    return mock;
905  }
906
907  /**
908   * Generate a list of KeyValues for testing based on given parameters
909   * @return the rows key-value list
910   */
911  List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) {
912    List<KeyValue> kvList = new ArrayList<>();
913    for (int i = 1; i <= numRows; i++) {
914      byte[] b = Bytes.toBytes(i);
915      LOG.info(Bytes.toString(b));
916      LOG.info(Bytes.toString(b));
917      for (long timestamp : timestamps) {
918        kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
919      }
920    }
921    return kvList;
922  }
923
924  /**
925   * Test to ensure correctness when using StoreFile with multiple timestamps
926   */
927  @Test
928  public void testMultipleTimestamps() throws IOException {
929    byte[] family = Bytes.toBytes("familyname");
930    byte[] qualifier = Bytes.toBytes("qualifier");
931    int numRows = 10;
932    long[] timestamps = new long[] { 20, 10, 5, 1 };
933    Scan scan = new Scan();
934
935    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
936    Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family));
937    Path dir = new Path(storedir, "1234567890");
938    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
939    // Make a store file and write data to it.
940    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
941      .withOutputDir(dir).withFileContext(meta).build();
942
943    List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family);
944
945    for (KeyValue kv : kvList) {
946      writer.append(kv);
947    }
948    writer.appendMetadata(0, false);
949    writer.close();
950
951    HStoreFile hsf =
952      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
953    HStore store = mock(HStore.class);
954    when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family));
955    hsf.initReader();
956    StoreFileReader reader = hsf.getReader();
957    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
958    TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
959    columns.add(qualifier);
960
961    scan.setTimeRange(20, 100);
962    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
963
964    scan.setTimeRange(1, 2);
965    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
966
967    scan.setTimeRange(8, 10);
968    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
969
970    // lets make sure it still works with column family time ranges
971    scan.setColumnFamilyTimeRange(family, 7, 50);
972    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
973
974    // This test relies on the timestamp range optimization
975    scan = new Scan();
976    scan.setTimeRange(27, 50);
977    assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
978
979    // should still use the scanner because we override the family time range
980    scan = new Scan();
981    scan.setTimeRange(27, 50);
982    scan.setColumnFamilyTimeRange(family, 7, 50);
983    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
984  }
985
986  @Test
987  public void testCacheOnWriteEvictOnClose() throws Exception {
988    Configuration conf = this.conf;
989
990    // Find a home for our files (regiondir ("7e0102") and familyname).
991    Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC");
992
993    // Grab the block cache and get the initial hit/miss counts
994    BlockCache bc = BlockCacheFactory.createBlockCache(conf);
995    assertNotNull(bc);
996    CacheStats cs = bc.getStats();
997    long startHit = cs.getHitCount();
998    long startMiss = cs.getMissCount();
999    long startEvicted = cs.getEvictedCount();
1000
1001    // Let's write a StoreFile with three blocks, with cache on write off
1002    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
1003    CacheConfig cacheConf = new CacheConfig(conf, bc);
1004    Path pathCowOff = new Path(baseDir, "123456789");
1005    StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
1006    HStoreFile hsf =
1007      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1008    LOG.debug(hsf.getPath().toString());
1009
1010    // Read this file, we should see 3 misses
1011    hsf.initReader();
1012    StoreFileReader reader = hsf.getReader();
1013    reader.loadFileInfo();
1014    StoreFileScanner scanner = getStoreFileScanner(reader, true, true);
1015    scanner.seek(KeyValue.LOWESTKEY);
1016    while (scanner.next() != null) {
1017      continue;
1018    }
1019    assertEquals(startHit, cs.getHitCount());
1020    assertEquals(startMiss + 3, cs.getMissCount());
1021    assertEquals(startEvicted, cs.getEvictedCount());
1022    startMiss += 3;
1023    scanner.close();
1024    reader.close(cacheConf.shouldEvictOnClose());
1025
1026    // Now write a StoreFile with three blocks, with cache on write on
1027    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
1028    cacheConf = new CacheConfig(conf, bc);
1029    Path pathCowOn = new Path(baseDir, "123456788");
1030    writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
1031    hsf = new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1032
1033    // Read this file, we should see 3 hits
1034    hsf.initReader();
1035    reader = hsf.getReader();
1036    scanner = getStoreFileScanner(reader, true, true);
1037    scanner.seek(KeyValue.LOWESTKEY);
1038    while (scanner.next() != null) {
1039      continue;
1040    }
1041    assertEquals(startHit + 3, cs.getHitCount());
1042    assertEquals(startMiss, cs.getMissCount());
1043    assertEquals(startEvicted, cs.getEvictedCount());
1044    startHit += 3;
1045    scanner.close();
1046    reader.close(cacheConf.shouldEvictOnClose());
1047
1048    // Let's read back the two files to ensure the blocks exactly match
1049    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1050    hsf.initReader();
1051    StoreFileReader readerOne = hsf.getReader();
1052    readerOne.loadFileInfo();
1053    StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true);
1054    scannerOne.seek(KeyValue.LOWESTKEY);
1055    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1056    hsf.initReader();
1057    StoreFileReader readerTwo = hsf.getReader();
1058    readerTwo.loadFileInfo();
1059    StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true);
1060    scannerTwo.seek(KeyValue.LOWESTKEY);
1061    Cell kv1 = null;
1062    Cell kv2 = null;
1063    while ((kv1 = scannerOne.next()) != null) {
1064      kv2 = scannerTwo.next();
1065      assertTrue(kv1.equals(kv2));
1066      KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1);
1067      KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2);
1068      assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(),
1069        keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0);
1070      assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(),
1071        kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
1072    }
1073    assertNull(scannerTwo.next());
1074    assertEquals(startHit + 6, cs.getHitCount());
1075    assertEquals(startMiss, cs.getMissCount());
1076    assertEquals(startEvicted, cs.getEvictedCount());
1077    startHit += 6;
1078    scannerOne.close();
1079    readerOne.close(cacheConf.shouldEvictOnClose());
1080    scannerTwo.close();
1081    readerTwo.close(cacheConf.shouldEvictOnClose());
1082
1083    // Let's close the first file with evict on close turned on
1084    conf.setBoolean("hbase.rs.evictblocksonclose", true);
1085    cacheConf = new CacheConfig(conf, bc);
1086    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1087    hsf.initReader();
1088    reader = hsf.getReader();
1089    reader.close(cacheConf.shouldEvictOnClose());
1090
1091    // We should have 3 new evictions but the evict count stat should not change. Eviction because
1092    // of HFile invalidation is not counted along with normal evictions
1093    assertEquals(startHit, cs.getHitCount());
1094    assertEquals(startMiss, cs.getMissCount());
1095    assertEquals(startEvicted, cs.getEvictedCount());
1096
1097    // Let's close the second file with evict on close turned off
1098    conf.setBoolean("hbase.rs.evictblocksonclose", false);
1099    cacheConf = new CacheConfig(conf, bc);
1100    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1101    hsf.initReader();
1102    reader = hsf.getReader();
1103    reader.close(cacheConf.shouldEvictOnClose());
1104
1105    // We expect no changes
1106    assertEquals(startHit, cs.getHitCount());
1107    assertEquals(startMiss, cs.getMissCount());
1108    assertEquals(startEvicted, cs.getEvictedCount());
1109  }
1110
1111  private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri,
1112    final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef)
1113    throws IOException {
1114    Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null);
1115    if (null == path) {
1116      return null;
1117    }
1118    List<Path> splitFiles = new ArrayList<>();
1119    splitFiles.add(path);
1120    MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class);
1121    MasterServices mockServices = mock(MasterServices.class);
1122    when(mockEnv.getMasterServices()).thenReturn(mockServices);
1123    when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration());
1124    TableDescriptors mockTblDescs = mock(TableDescriptors.class);
1125    when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs);
1126    TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable())
1127      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
1128    when(mockTblDescs.get(any())).thenReturn(mockTblDesc);
1129    Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv);
1130    return new Path(new Path(regionDir, family), path.getName());
1131  }
1132
1133  private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path,
1134    int numBlocks) throws IOException {
1135    // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
1136    int numKVs = 5 * numBlocks;
1137    List<KeyValue> kvs = new ArrayList<>(numKVs);
1138    byte[] b = Bytes.toBytes("x");
1139    int totalSize = 0;
1140    for (int i = numKVs; i > 0; i--) {
1141      KeyValue kv = new KeyValue(b, b, b, i, b);
1142      kvs.add(kv);
1143      // kv has memstoreTS 0, which takes 1 byte to store.
1144      totalSize += kv.getLength() + 1;
1145    }
1146    int blockSize = totalSize / numBlocks;
1147    HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE)
1148      .withBytesPerCheckSum(CKBYTES).build();
1149    // Make a store file and write data to it.
1150    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1151      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1152    // We'll write N-1 KVs to ensure we don't write an extra block
1153    kvs.remove(kvs.size() - 1);
1154    for (KeyValue kv : kvs) {
1155      writer.append(kv);
1156    }
1157    writer.appendMetadata(0, false);
1158    writer.close();
1159    return writer;
1160  }
1161
1162  /**
1163   * Check if data block encoding information is saved correctly in HFile's file info.
1164   */
1165  @Test
1166  public void testDataBlockEncodingMetaData() throws IOException {
1167    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1168    Path dir = new Path(new Path(testDir, "7e0102"), "familyname");
1169    Path path = new Path(dir, "1234567890");
1170
1171    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1172    cacheConf = new CacheConfig(conf);
1173    HFileContext meta =
1174      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1175        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1176    // Make a store file and write data to it.
1177    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1178      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1179    writer.close();
1180
1181    HStoreFile storeFile =
1182      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1183    storeFile.initReader();
1184    StoreFileReader reader = storeFile.getReader();
1185
1186    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1187    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1188    assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
1189  }
1190
1191  @Test
1192  public void testDataBlockSizeEncoded() throws Exception {
1193    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1194    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1195    Path path = new Path(dir, "1234567890");
1196
1197    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1198
1199    conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1);
1200
1201    cacheConf = new CacheConfig(conf);
1202    HFileContext meta =
1203      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1204        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1205    // Make a store file and write data to it.
1206    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1207      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1208    writeStoreFile(writer);
1209
1210    HStoreFile storeFile =
1211      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1212    storeFile.initReader();
1213    StoreFileReader reader = storeFile.getReader();
1214
1215    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1216    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1217    assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value));
1218
1219    HFile.Reader fReader =
1220      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1221
1222    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1223    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1224    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1225    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1226    HFileBlock block;
1227    while (offset <= max) {
1228      block = fReader.readBlock(offset, -1, /* cacheBlock */
1229        false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */
1230        false, null, null);
1231      offset += block.getOnDiskSizeWithHeader();
1232      double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL;
1233      if (offset <= max) {
1234        assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05));
1235      }
1236    }
1237  }
1238
1239  @Test
1240  public void testDataBlockSizeCompressed() throws Exception {
1241    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR,
1242      PreviousBlockCompressionRatePredicator.class.getName());
1243    testDataBlockSizeWithCompressionRatePredicator(12,
1244      (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true);
1245  }
1246
1247  @Test
1248  public void testDataBlockSizeUnCompressed() throws Exception {
1249    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName());
1250    testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10);
1251  }
1252
1253  private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount,
1254    BiFunction<Integer, Integer, Boolean> validation) throws Exception {
1255    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1256    Path path = new Path(dir, "1234567890");
1257    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1258    cacheConf = new CacheConfig(conf);
1259    HFileContext meta =
1260      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1261        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo)
1262        .withCompression(Compression.Algorithm.GZ).build();
1263    // Make a store file and write data to it.
1264    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1265      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1266    writeLargeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
1267      Bytes.toBytes(name.getMethodName()), 200);
1268    writer.close();
1269    HStoreFile storeFile =
1270      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1271    storeFile.initReader();
1272    HFile.Reader fReader =
1273      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1274    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1275    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1276    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1277    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1278    HFileBlock block;
1279    int blockCount = 0;
1280    while (offset <= max) {
1281      block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
1282        /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
1283      offset += block.getOnDiskSizeWithHeader();
1284      blockCount++;
1285      assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount));
1286    }
1287    assertEquals(expectedBlockCount, blockCount);
1288  }
1289
1290}