001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR;
021import static org.junit.Assert.assertArrayEquals;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertFalse;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertNull;
026import static org.junit.Assert.assertTrue;
027import static org.junit.Assert.fail;
028import static org.mockito.ArgumentMatchers.any;
029import static org.mockito.Mockito.mock;
030import static org.mockito.Mockito.when;
031
032import java.io.IOException;
033import java.nio.ByteBuffer;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Comparator;
038import java.util.List;
039import java.util.Map;
040import java.util.OptionalLong;
041import java.util.TreeSet;
042import java.util.function.BiFunction;
043import org.apache.hadoop.conf.Configuration;
044import org.apache.hadoop.fs.FileSystem;
045import org.apache.hadoop.fs.Path;
046import org.apache.hadoop.hbase.Cell;
047import org.apache.hadoop.hbase.CellUtil;
048import org.apache.hadoop.hbase.HBaseClassTestRule;
049import org.apache.hadoop.hbase.HBaseTestingUtil;
050import org.apache.hadoop.hbase.HConstants;
051import org.apache.hadoop.hbase.KeyValue;
052import org.apache.hadoop.hbase.KeyValueUtil;
053import org.apache.hadoop.hbase.PrivateCellUtil;
054import org.apache.hadoop.hbase.TableDescriptors;
055import org.apache.hadoop.hbase.TableName;
056import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
058import org.apache.hadoop.hbase.client.RegionInfo;
059import org.apache.hadoop.hbase.client.RegionInfoBuilder;
060import org.apache.hadoop.hbase.client.Scan;
061import org.apache.hadoop.hbase.client.TableDescriptor;
062import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
063import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
064import org.apache.hadoop.hbase.io.HFileLink;
065import org.apache.hadoop.hbase.io.compress.Compression;
066import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
067import org.apache.hadoop.hbase.io.hfile.BlockCache;
068import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
069import org.apache.hadoop.hbase.io.hfile.CacheConfig;
070import org.apache.hadoop.hbase.io.hfile.CacheStats;
071import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
072import org.apache.hadoop.hbase.io.hfile.HFile;
073import org.apache.hadoop.hbase.io.hfile.HFileBlock;
074import org.apache.hadoop.hbase.io.hfile.HFileContext;
075import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
076import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
077import org.apache.hadoop.hbase.io.hfile.HFileScanner;
078import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator;
079import org.apache.hadoop.hbase.io.hfile.ReaderContext;
080import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
081import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator;
082import org.apache.hadoop.hbase.master.MasterServices;
083import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
084import org.apache.hadoop.hbase.testclassification.MediumTests;
085import org.apache.hadoop.hbase.testclassification.RegionServerTests;
086import org.apache.hadoop.hbase.util.BloomFilterFactory;
087import org.apache.hadoop.hbase.util.Bytes;
088import org.apache.hadoop.hbase.util.ChecksumType;
089import org.apache.hadoop.hbase.util.CommonFSUtils;
090import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
091import org.junit.AfterClass;
092import org.junit.Before;
093import org.junit.ClassRule;
094import org.junit.Rule;
095import org.junit.Test;
096import org.junit.experimental.categories.Category;
097import org.junit.rules.TestName;
098import org.mockito.Mockito;
099import org.slf4j.Logger;
100import org.slf4j.LoggerFactory;
101
102import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
103import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
104import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
105
106/**
107 * Test HStoreFile
108 */
109@Category({ RegionServerTests.class, MediumTests.class })
110public class TestHStoreFile {
111
112  @ClassRule
113  public static final HBaseClassTestRule CLASS_RULE =
114    HBaseClassTestRule.forClass(TestHStoreFile.class);
115
116  private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class);
117  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
118  private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration());
119  private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile");
120  private static final ChecksumType CKTYPE = ChecksumType.CRC32C;
121  private static final int CKBYTES = 512;
122  private static String TEST_FAMILY = "cf";
123  private static final char FIRST_CHAR = 'a';
124  private static final char LAST_CHAR = 'z';
125
126  @Rule
127  public TestName name = new TestName();
128
129  private Configuration conf;
130  private Path testDir;
131  private FileSystem fs;
132
133  @Before
134  public void setUp() throws IOException {
135    conf = TEST_UTIL.getConfiguration();
136    testDir = TEST_UTIL.getDataTestDir(name.getMethodName());
137    fs = testDir.getFileSystem(conf);
138  }
139
140  @AfterClass
141  public static void tearDownAfterClass() {
142    TEST_UTIL.cleanupTestDir();
143  }
144
145  /**
146   * Write a file and then assert that we can read from top and bottom halves using two
147   * HalfMapFiles, as well as one HalfMapFile and one HFileLink file.
148   */
149  @Test
150  public void testBasicHalfAndHFileLinkMapFile() throws Exception {
151    final RegionInfo hri =
152      RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build();
153    // The locations of HFileLink refers hfiles only should be consistent with the table dir
154    // create by CommonFSUtils directory, so we should make the region directory under
155    // the mode of CommonFSUtils.getTableDir here.
156    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
157      CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri);
158
159    HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
160    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
161      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
162    writeStoreFile(writer);
163
164    Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
165    HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true);
166    checkHalfHFile(regionFs, sf);
167  }
168
169  private void writeStoreFile(final StoreFileWriter writer) throws IOException {
170    writeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
171      Bytes.toBytes(name.getMethodName()));
172  }
173
174  // pick an split point (roughly halfway)
175  byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR };
176
177  /*
178   * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it.
179   */
180  public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier)
181    throws IOException {
182    long now = EnvironmentEdgeManager.currentTime();
183    try {
184      for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
185        for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
186          byte[] b = new byte[] { (byte) d, (byte) e };
187          writer.append(new KeyValue(b, fam, qualifier, now, b));
188        }
189      }
190    } finally {
191      writer.close();
192    }
193  }
194
195  public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier,
196    int rounds) throws IOException {
197    long now = EnvironmentEdgeManager.currentTime();
198    try {
199      for (int i = 0; i < rounds; i++) {
200        for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
201          for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
202            byte[] b = new byte[] { (byte) d, (byte) e };
203            byte[] key = new byte[] { (byte) i };
204            writer.append(new KeyValue(key, fam, qualifier, now, b));
205          }
206        }
207      }
208    } finally {
209      writer.close();
210    }
211  }
212
213  /**
214   * Test that our mechanism of writing store files in one region to reference store files in other
215   * regions works.
216   */
217  @Test
218  public void testReference() throws IOException {
219    final RegionInfo hri =
220      RegionInfoBuilder.newBuilder(TableName.valueOf("testReferenceTb")).build();
221    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
222      new Path(testDir, hri.getTable().getNameAsString()), hri);
223
224    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
225    // Make a store file and write data to it.
226    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
227      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
228    writeStoreFile(writer);
229
230    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
231    HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
232    hsf.initReader();
233    StoreFileReader reader = hsf.getReader();
234    // Split on a row, not in middle of row. Midkey returned by reader
235    // may be in middle of row. Create new one with empty column and
236    // timestamp.
237    byte[] midRow = CellUtil.cloneRow(reader.midKey().get());
238    byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get());
239    hsf.closeStoreFile(true);
240
241    // Make a reference
242    RegionInfo splitHri = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(midRow).build();
243    Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true);
244    HStoreFile refHsf = new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true);
245    refHsf.initReader();
246    // Now confirm that I can read from the reference and that it only gets
247    // keys from top half of the file.
248    HFileScanner s = refHsf.getReader().getScanner(false, false);
249    Cell kv = null;
250    for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
251      ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey());
252      kv = KeyValueUtil.createKeyValueFromKey(bb);
253      if (first) {
254        assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0,
255          midRow.length));
256        first = false;
257      }
258    }
259    assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0,
260      finalRow.length));
261  }
262
263  @Test
264  public void testStoreFileReference() throws Exception {
265    final RegionInfo hri =
266      RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build();
267    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
268      new Path(testDir, hri.getTable().getNameAsString()), hri);
269    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
270
271    // Make a store file and write data to it.
272    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
273      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
274    writeStoreFile(writer);
275    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
276    writer.close();
277
278    HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
279    file.initReader();
280    StoreFileReader r = file.getReader();
281    assertNotNull(r);
282    StoreFileScanner scanner =
283      new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false);
284
285    // Verify after instantiating scanner refCount is increased
286    assertTrue("Verify file is being referenced", file.isReferencedInReads());
287    scanner.close();
288    // Verify after closing scanner refCount is decreased
289    assertFalse("Verify file is not being referenced", file.isReferencedInReads());
290  }
291
292  @Test
293  public void testEmptyStoreFileRestrictKeyRanges() throws Exception {
294    StoreFileReader reader = mock(StoreFileReader.class);
295    HStore store = mock(HStore.class);
296    byte[] cf = Bytes.toBytes("ty");
297    ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
298    when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
299    try (StoreFileScanner scanner =
300      new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) {
301      Scan scan = new Scan();
302      scan.setColumnFamilyTimeRange(cf, 0, 1);
303      assertFalse(scanner.shouldUseScanner(scan, store, 0));
304    }
305  }
306
307  @Test
308  public void testHFileLink() throws IOException {
309    final RegionInfo hri =
310      RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build();
311    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
312    Configuration testConf = new Configuration(this.conf);
313    CommonFSUtils.setRootDir(testConf, testDir);
314    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
315      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
316    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
317
318    // Make a store file and write data to it.
319    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
320      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
321    writeStoreFile(writer);
322
323    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
324    Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
325    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
326    Path linkFilePath =
327      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
328
329    // Try to open store file from link
330    StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath, true);
331    HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf);
332    assertTrue(storeFileInfo.isLink());
333    hsf.initReader();
334
335    // Now confirm that I can read from the link
336    int count = 1;
337    HFileScanner s = hsf.getReader().getScanner(false, false);
338    s.seekTo();
339    while (s.next()) {
340      count++;
341    }
342    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
343  }
344
345  /**
346   * This test creates an hfile and then the dir structures and files to verify that references to
347   * hfilelinks (created by snapshot clones) can be properly interpreted.
348   */
349  @Test
350  public void testReferenceToHFileLink() throws IOException {
351    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
352    Configuration testConf = new Configuration(this.conf);
353    CommonFSUtils.setRootDir(testConf, testDir);
354
355    // adding legal table name chars to verify regex handles it.
356    RegionInfo hri = RegionInfoBuilder.newBuilder(TableName.valueOf("_original-evil-name")).build();
357    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
358      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
359
360    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
361    // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
362    StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs)
363      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
364    writeStoreFile(writer);
365    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
366
367    // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
368    RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build();
369    HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
370      CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone);
371    Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY);
372    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
373    Path linkFilePath =
374      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
375
376    // create splits of the link.
377    // <root>/clone/splitA/<cf>/<reftohfilelink>,
378    // <root>/clone/splitB/<cf>/<reftohfilelink>
379    RegionInfo splitHriA = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(SPLITKEY).build();
380    RegionInfo splitHriB =
381      RegionInfoBuilder.newBuilder(hri.getTable()).setStartKey(SPLITKEY).build();
382    HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true);
383    f.initReader();
384    Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top
385    Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom
386    f.closeStoreFile(true);
387    // OK test the thing
388    CommonFSUtils.logFileSystemState(fs, testDir, LOG);
389
390    // There is a case where a file with the hfilelink pattern is actually a daughter
391    // reference to a hfile link. This code in StoreFile that handles this case.
392
393    // Try to open store file from link
394    HStoreFile hsfA = new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true);
395    hsfA.initReader();
396
397    // Now confirm that I can read from the ref to link
398    int count = 1;
399    HFileScanner s = hsfA.getReader().getScanner(false, false);
400    s.seekTo();
401    while (s.next()) {
402      count++;
403    }
404    assertTrue(count > 0); // read some rows here
405
406    // Try to open store file from link
407    HStoreFile hsfB = new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true);
408    hsfB.initReader();
409
410    // Now confirm that I can read from the ref to link
411    HFileScanner sB = hsfB.getReader().getScanner(false, false);
412    sB.seekTo();
413
414    // count++ as seekTo() will advance the scanner
415    count++;
416    while (sB.next()) {
417      count++;
418    }
419
420    // read the rest of the rows
421    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
422  }
423
424  private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f)
425    throws IOException {
426    f.initReader();
427    Cell midkey = f.getReader().midKey().get();
428    KeyValue midKV = (KeyValue) midkey;
429    // 1. test using the midRow as the splitKey, this test will generate two Reference files
430    // in the children
431    byte[] midRow = CellUtil.cloneRow(midKV);
432    // Create top split.
433    RegionInfo topHri =
434      RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()).setEndKey(SPLITKEY).build();
435    Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true);
436    // Create bottom split.
437    RegionInfo bottomHri = RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable())
438      .setStartKey(SPLITKEY).build();
439    Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false);
440    // Make readers on top and bottom.
441    HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
442    topF.initReader();
443    StoreFileReader top = topF.getReader();
444    HStoreFile bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
445    bottomF.initReader();
446    StoreFileReader bottom = bottomF.getReader();
447    ByteBuffer previous = null;
448    LOG.info("Midkey: " + midKV.toString());
449    ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey());
450    try {
451      // Now make two HalfMapFiles and assert they can read the full backing
452      // file, one from the top and the other from the bottom.
453      // Test bottom half first.
454      // Now test reading from the top.
455      boolean first = true;
456      ByteBuffer key = null;
457      HFileScanner topScanner = top.getScanner(false, false);
458      while (
459        (!topScanner.isSeeked() && topScanner.seekTo())
460          || (topScanner.isSeeked() && topScanner.next())
461      ) {
462        key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
463
464        if (
465          (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(),
466            key.arrayOffset(), key.limit())) > 0
467        ) {
468          fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey);
469        }
470        if (first) {
471          first = false;
472          LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
473        }
474      }
475      LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
476
477      first = true;
478      HFileScanner bottomScanner = bottom.getScanner(false, false);
479      while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
480        previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
481        key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
482        if (first) {
483          first = false;
484          LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
485        }
486        assertTrue(key.compareTo(bbMidkeyBytes) < 0);
487      }
488      if (previous != null) {
489        LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
490      }
491      // Remove references.
492      regionFs.cleanupDaughterRegion(topHri);
493      regionFs.cleanupDaughterRegion(bottomHri);
494
495      // 2. test using a midkey which will generate one Reference file and one HFileLink file.
496      // First, do a key that is < than first key. Ensure splits behave
497      // properly.
498      byte[] badmidkey = Bytes.toBytes("  .");
499      assertTrue(fs.exists(f.getPath()));
500      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
501      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
502
503      assertNull(bottomPath);
504
505      topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
506      topF.initReader();
507      top = topF.getReader();
508      // Now read from the top.
509      first = true;
510      topScanner = top.getScanner(false, false);
511      KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue();
512      while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) {
513        key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
514        keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit());
515        assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV,
516          badmidkey, 0, badmidkey.length) >= 0);
517        if (first) {
518          first = false;
519          KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
520          LOG.info("First top when key < bottom: " + keyKV);
521          String tmp =
522            Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
523          for (int i = 0; i < tmp.length(); i++) {
524            assertTrue(tmp.charAt(i) == 'a');
525          }
526        }
527      }
528      KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
529      LOG.info("Last top when key < bottom: " + keyKV);
530      String tmp = Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
531      for (int i = 0; i < tmp.length(); i++) {
532        assertTrue(tmp.charAt(i) == 'z');
533      }
534      // Remove references.
535      regionFs.cleanupDaughterRegion(topHri);
536      regionFs.cleanupDaughterRegion(bottomHri);
537
538      // Test when badkey is > than last key in file ('||' > 'zz').
539      badmidkey = Bytes.toBytes("|||");
540      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
541      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
542      assertNull(topPath);
543
544      bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
545      bottomF.initReader();
546      bottom = bottomF.getReader();
547      first = true;
548      bottomScanner = bottom.getScanner(false, false);
549      while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
550        key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
551        if (first) {
552          first = false;
553          keyKV = KeyValueUtil.createKeyValueFromKey(key);
554          LOG.info("First bottom when key > top: " + keyKV);
555          tmp = Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
556          for (int i = 0; i < tmp.length(); i++) {
557            assertTrue(tmp.charAt(i) == 'a');
558          }
559        }
560      }
561      keyKV = KeyValueUtil.createKeyValueFromKey(key);
562      LOG.info("Last bottom when key > top: " + keyKV);
563      for (int i = 0; i < tmp.length(); i++) {
564        assertTrue(
565          Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()).charAt(i)
566              == 'z');
567      }
568    } finally {
569      if (top != null) {
570        top.close(true); // evict since we are about to delete the file
571      }
572      if (bottom != null) {
573        bottom.close(true); // evict since we are about to delete the file
574      }
575      fs.delete(f.getPath(), true);
576    }
577  }
578
579  private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks,
580    boolean pread) {
581    return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false);
582  }
583
584  private static final String localFormatter = "%010d";
585
586  private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception {
587    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
588    Path f = writer.getPath();
589    long now = EnvironmentEdgeManager.currentTime();
590    for (int i = 0; i < 2000; i += 2) {
591      String row = String.format(localFormatter, i);
592      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
593        now, Bytes.toBytes("value"));
594      writer.append(kv);
595    }
596    writer.close();
597
598    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
599    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
600    storeFileInfo.initHFileInfo(context);
601    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
602    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
603    reader.loadFileInfo();
604    reader.loadBloomfilter();
605    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
606
607    // check false positives rate
608    int falsePos = 0;
609    int falseNeg = 0;
610    for (int i = 0; i < 2000; i++) {
611      String row = String.format(localFormatter, i);
612      TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
613      columns.add(Bytes.toBytes("family:col"));
614
615      Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
616      scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col"));
617      HStore store = mock(HStore.class);
618      when(store.getColumnFamilyDescriptor())
619        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
620      boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
621      if (i % 2 == 0) {
622        if (!exists) {
623          falseNeg++;
624        }
625      } else {
626        if (exists) {
627          falsePos++;
628        }
629      }
630    }
631    reader.close(true); // evict because we are about to delete the file
632    fs.delete(f, true);
633    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
634    int maxFalsePos = (int) (2 * 2000 * err);
635    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
636      + maxFalsePos + ")", falsePos <= maxFalsePos);
637  }
638
639  private static final int BLOCKSIZE_SMALL = 8192;
640
641  @Test
642  public void testBloomFilter() throws Exception {
643    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
644    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
645
646    // write the file
647    if (!fs.exists(ROOT_DIR)) {
648      fs.mkdirs(ROOT_DIR);
649    }
650    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
651    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
652      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
653    // Make a store file and write data to it.
654    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
655      .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build();
656    bloomWriteRead(writer, fs);
657  }
658
659  @Test
660  public void testDeleteFamilyBloomFilter() throws Exception {
661    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
662    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
663    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
664
665    // write the file
666    if (!fs.exists(ROOT_DIR)) {
667      fs.mkdirs(ROOT_DIR);
668    }
669    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
670
671    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
672      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
673    // Make a store file and write data to it.
674    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
675      .withMaxKeyCount(2000).withFileContext(meta).build();
676
677    // add delete family
678    long now = EnvironmentEdgeManager.currentTime();
679    for (int i = 0; i < 2000; i += 2) {
680      String row = String.format(localFormatter, i);
681      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
682        now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value"));
683      writer.append(kv);
684    }
685    writer.close();
686
687    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
688    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
689    storeFileInfo.initHFileInfo(context);
690    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
691    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
692    reader.loadFileInfo();
693    reader.loadBloomfilter();
694
695    // check false positives rate
696    int falsePos = 0;
697    int falseNeg = 0;
698    for (int i = 0; i < 2000; i++) {
699      String row = String.format(localFormatter, i);
700      byte[] rowKey = Bytes.toBytes(row);
701      boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length);
702      if (i % 2 == 0) {
703        if (!exists) {
704          falseNeg++;
705        }
706      } else {
707        if (exists) {
708          falsePos++;
709        }
710      }
711    }
712    assertEquals(1000, reader.getDeleteFamilyCnt());
713    reader.close(true); // evict because we are about to delete the file
714    fs.delete(f, true);
715    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
716    int maxFalsePos = (int) (2 * 2000 * err);
717    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
718      + maxFalsePos, falsePos <= maxFalsePos);
719  }
720
721  /**
722   * Test for HBASE-8012
723   */
724  @Test
725  public void testReseek() throws Exception {
726    // write the file
727    if (!fs.exists(ROOT_DIR)) {
728      fs.mkdirs(ROOT_DIR);
729    }
730    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
731
732    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
733    // Make a store file and write data to it.
734    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
735      .withFileContext(meta).build();
736
737    writeStoreFile(writer);
738    writer.close();
739
740    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
741    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
742    storeFileInfo.initHFileInfo(context);
743    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
744    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
745
746    // Now do reseek with empty KV to position to the beginning of the file
747
748    KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
749    StoreFileScanner s = getStoreFileScanner(reader, false, false);
750    s.reseek(k);
751
752    assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
753  }
754
755  @Test
756  public void testBloomTypes() throws Exception {
757    float err = (float) 0.01;
758    FileSystem fs = FileSystem.getLocal(conf);
759    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
760    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
761
762    int rowCount = 50;
763    int colCount = 10;
764    int versions = 2;
765
766    // run once using columns and once using rows
767    BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
768    int[] expKeys = { rowCount * colCount, rowCount };
769    // below line deserves commentary. it is expected bloom false positives
770    // column = rowCount*2*colCount inserts
771    // row-level = only rowCount*2 inserts, but failures will be magnified by
772    // 2nd for loop for every column (2*colCount)
773    float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
774
775    if (!fs.exists(ROOT_DIR)) {
776      fs.mkdirs(ROOT_DIR);
777    }
778    for (int x : new int[] { 0, 1 }) {
779      // write the file
780      Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
781
782      HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
783        .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
784      // Make a store file and write data to it.
785      StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
786        .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
787
788      long now = EnvironmentEdgeManager.currentTime();
789      for (int i = 0; i < rowCount * 2; i += 2) { // rows
790        for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers
791          String row = String.format(localFormatter, i);
792          String col = String.format(localFormatter, j);
793          for (int k = 0; k < versions; ++k) { // versions
794            KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
795              Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L));
796            writer.append(kv);
797          }
798        }
799      }
800      writer.close();
801
802      ReaderContext context =
803        new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen())
804          .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build();
805      StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
806      storeFileInfo.initHFileInfo(context);
807      StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
808      storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
809      reader.loadFileInfo();
810      reader.loadBloomfilter();
811      StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
812      assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount());
813
814      HStore store = mock(HStore.class);
815      when(store.getColumnFamilyDescriptor())
816        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
817      // check false positives rate
818      int falsePos = 0;
819      int falseNeg = 0;
820      for (int i = 0; i < rowCount * 2; ++i) { // rows
821        for (int j = 0; j < colCount * 2; ++j) { // column qualifiers
822          String row = String.format(localFormatter, i);
823          String col = String.format(localFormatter, j);
824          TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
825          columns.add(Bytes.toBytes("col" + col));
826
827          Scan scan =
828            new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
829          scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col)));
830
831          boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
832          boolean shouldRowExist = i % 2 == 0;
833          boolean shouldColExist = j % 2 == 0;
834          shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
835          if (shouldRowExist && shouldColExist) {
836            if (!exists) {
837              falseNeg++;
838            }
839          } else {
840            if (exists) {
841              falsePos++;
842            }
843          }
844        }
845      }
846      reader.close(true); // evict because we are about to delete the file
847      fs.delete(f, true);
848      System.out.println(bt[x].toString());
849      System.out.println("  False negatives: " + falseNeg);
850      System.out.println("  False positives: " + falsePos);
851      assertEquals(0, falseNeg);
852      assertTrue(falsePos < 2 * expErr[x]);
853    }
854  }
855
856  @Test
857  public void testSeqIdComparator() {
858    assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"),
859      mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"),
860      mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"),
861      mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"),
862      mockStoreFile(false, 76, -1, 5, "/foo/3"));
863  }
864
865  /**
866   * Assert that the given comparator orders the given storefiles in the same way that they're
867   * passed.
868   */
869  private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) {
870    ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs);
871    Collections.shuffle(sorted);
872    Collections.sort(sorted, comparator);
873    LOG.debug("sfs: " + Joiner.on(",").join(sfs));
874    LOG.debug("sorted: " + Joiner.on(",").join(sorted));
875    assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
876  }
877
878  /**
879   * Create a mock StoreFile with the given attributes.
880   */
881  private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId,
882    String path) {
883    HStoreFile mock = Mockito.mock(HStoreFile.class);
884    StoreFileReader reader = Mockito.mock(StoreFileReader.class);
885
886    Mockito.doReturn(size).when(reader).length();
887
888    Mockito.doReturn(reader).when(mock).getReader();
889    Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
890    Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp();
891    Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
892    Mockito.doReturn(new Path(path)).when(mock).getPath();
893    String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp
894      + " seqId=" + seqId + " path=" + path;
895    Mockito.doReturn(name).when(mock).toString();
896    return mock;
897  }
898
899  /**
900   * Generate a list of KeyValues for testing based on given parameters
901   * @return the rows key-value list
902   */
903  List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) {
904    List<KeyValue> kvList = new ArrayList<>();
905    for (int i = 1; i <= numRows; i++) {
906      byte[] b = Bytes.toBytes(i);
907      LOG.info(Bytes.toString(b));
908      LOG.info(Bytes.toString(b));
909      for (long timestamp : timestamps) {
910        kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
911      }
912    }
913    return kvList;
914  }
915
916  /**
917   * Test to ensure correctness when using StoreFile with multiple timestamps
918   */
919  @Test
920  public void testMultipleTimestamps() throws IOException {
921    byte[] family = Bytes.toBytes("familyname");
922    byte[] qualifier = Bytes.toBytes("qualifier");
923    int numRows = 10;
924    long[] timestamps = new long[] { 20, 10, 5, 1 };
925    Scan scan = new Scan();
926
927    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
928    Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family));
929    Path dir = new Path(storedir, "1234567890");
930    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
931    // Make a store file and write data to it.
932    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
933      .withOutputDir(dir).withFileContext(meta).build();
934
935    List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family);
936
937    for (KeyValue kv : kvList) {
938      writer.append(kv);
939    }
940    writer.appendMetadata(0, false);
941    writer.close();
942
943    HStoreFile hsf =
944      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
945    HStore store = mock(HStore.class);
946    when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family));
947    hsf.initReader();
948    StoreFileReader reader = hsf.getReader();
949    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
950    TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
951    columns.add(qualifier);
952
953    scan.setTimeRange(20, 100);
954    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
955
956    scan.setTimeRange(1, 2);
957    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
958
959    scan.setTimeRange(8, 10);
960    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
961
962    // lets make sure it still works with column family time ranges
963    scan.setColumnFamilyTimeRange(family, 7, 50);
964    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
965
966    // This test relies on the timestamp range optimization
967    scan = new Scan();
968    scan.setTimeRange(27, 50);
969    assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
970
971    // should still use the scanner because we override the family time range
972    scan = new Scan();
973    scan.setTimeRange(27, 50);
974    scan.setColumnFamilyTimeRange(family, 7, 50);
975    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
976  }
977
978  @Test
979  public void testCacheOnWriteEvictOnClose() throws Exception {
980    Configuration conf = this.conf;
981
982    // Find a home for our files (regiondir ("7e0102") and familyname).
983    Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC");
984
985    // Grab the block cache and get the initial hit/miss counts
986    BlockCache bc = BlockCacheFactory.createBlockCache(conf);
987    assertNotNull(bc);
988    CacheStats cs = bc.getStats();
989    long startHit = cs.getHitCount();
990    long startMiss = cs.getMissCount();
991    long startEvicted = cs.getEvictedCount();
992
993    // Let's write a StoreFile with three blocks, with cache on write off
994    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
995    CacheConfig cacheConf = new CacheConfig(conf, bc);
996    Path pathCowOff = new Path(baseDir, "123456789");
997    StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
998    HStoreFile hsf =
999      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1000    LOG.debug(hsf.getPath().toString());
1001
1002    // Read this file, we should see 3 misses
1003    hsf.initReader();
1004    StoreFileReader reader = hsf.getReader();
1005    reader.loadFileInfo();
1006    StoreFileScanner scanner = getStoreFileScanner(reader, true, true);
1007    scanner.seek(KeyValue.LOWESTKEY);
1008    while (scanner.next() != null) {
1009      continue;
1010    }
1011    assertEquals(startHit, cs.getHitCount());
1012    assertEquals(startMiss + 3, cs.getMissCount());
1013    assertEquals(startEvicted, cs.getEvictedCount());
1014    startMiss += 3;
1015    scanner.close();
1016    reader.close(cacheConf.shouldEvictOnClose());
1017
1018    // Now write a StoreFile with three blocks, with cache on write on
1019    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
1020    cacheConf = new CacheConfig(conf, bc);
1021    Path pathCowOn = new Path(baseDir, "123456788");
1022    writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
1023    hsf = new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1024
1025    // Read this file, we should see 3 hits
1026    hsf.initReader();
1027    reader = hsf.getReader();
1028    scanner = getStoreFileScanner(reader, true, true);
1029    scanner.seek(KeyValue.LOWESTKEY);
1030    while (scanner.next() != null) {
1031      continue;
1032    }
1033    assertEquals(startHit + 3, cs.getHitCount());
1034    assertEquals(startMiss, cs.getMissCount());
1035    assertEquals(startEvicted, cs.getEvictedCount());
1036    startHit += 3;
1037    scanner.close();
1038    reader.close(cacheConf.shouldEvictOnClose());
1039
1040    // Let's read back the two files to ensure the blocks exactly match
1041    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1042    hsf.initReader();
1043    StoreFileReader readerOne = hsf.getReader();
1044    readerOne.loadFileInfo();
1045    StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true);
1046    scannerOne.seek(KeyValue.LOWESTKEY);
1047    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1048    hsf.initReader();
1049    StoreFileReader readerTwo = hsf.getReader();
1050    readerTwo.loadFileInfo();
1051    StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true);
1052    scannerTwo.seek(KeyValue.LOWESTKEY);
1053    Cell kv1 = null;
1054    Cell kv2 = null;
1055    while ((kv1 = scannerOne.next()) != null) {
1056      kv2 = scannerTwo.next();
1057      assertTrue(kv1.equals(kv2));
1058      KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1);
1059      KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2);
1060      assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(),
1061        keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0);
1062      assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(),
1063        kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
1064    }
1065    assertNull(scannerTwo.next());
1066    assertEquals(startHit + 6, cs.getHitCount());
1067    assertEquals(startMiss, cs.getMissCount());
1068    assertEquals(startEvicted, cs.getEvictedCount());
1069    startHit += 6;
1070    scannerOne.close();
1071    readerOne.close(cacheConf.shouldEvictOnClose());
1072    scannerTwo.close();
1073    readerTwo.close(cacheConf.shouldEvictOnClose());
1074
1075    // Let's close the first file with evict on close turned on
1076    conf.setBoolean("hbase.rs.evictblocksonclose", true);
1077    cacheConf = new CacheConfig(conf, bc);
1078    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1079    hsf.initReader();
1080    reader = hsf.getReader();
1081    reader.close(cacheConf.shouldEvictOnClose());
1082
1083    // We should have 3 new evictions but the evict count stat should not change. Eviction because
1084    // of HFile invalidation is not counted along with normal evictions
1085    assertEquals(startHit, cs.getHitCount());
1086    assertEquals(startMiss, cs.getMissCount());
1087    assertEquals(startEvicted, cs.getEvictedCount());
1088
1089    // Let's close the second file with evict on close turned off
1090    conf.setBoolean("hbase.rs.evictblocksonclose", false);
1091    cacheConf = new CacheConfig(conf, bc);
1092    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1093    hsf.initReader();
1094    reader = hsf.getReader();
1095    reader.close(cacheConf.shouldEvictOnClose());
1096
1097    // We expect no changes
1098    assertEquals(startHit, cs.getHitCount());
1099    assertEquals(startMiss, cs.getMissCount());
1100    assertEquals(startEvicted, cs.getEvictedCount());
1101  }
1102
1103  private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri,
1104    final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef)
1105    throws IOException {
1106    Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null);
1107    if (null == path) {
1108      return null;
1109    }
1110    List<Path> splitFiles = new ArrayList<>();
1111    splitFiles.add(path);
1112    MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class);
1113    MasterServices mockServices = mock(MasterServices.class);
1114    when(mockEnv.getMasterServices()).thenReturn(mockServices);
1115    when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration());
1116    TableDescriptors mockTblDescs = mock(TableDescriptors.class);
1117    when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs);
1118    TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable())
1119      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
1120    when(mockTblDescs.get(any())).thenReturn(mockTblDesc);
1121    Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv);
1122    return new Path(new Path(regionDir, family), path.getName());
1123  }
1124
1125  private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path,
1126    int numBlocks) throws IOException {
1127    // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
1128    int numKVs = 5 * numBlocks;
1129    List<KeyValue> kvs = new ArrayList<>(numKVs);
1130    byte[] b = Bytes.toBytes("x");
1131    int totalSize = 0;
1132    for (int i = numKVs; i > 0; i--) {
1133      KeyValue kv = new KeyValue(b, b, b, i, b);
1134      kvs.add(kv);
1135      // kv has memstoreTS 0, which takes 1 byte to store.
1136      totalSize += kv.getLength() + 1;
1137    }
1138    int blockSize = totalSize / numBlocks;
1139    HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE)
1140      .withBytesPerCheckSum(CKBYTES).build();
1141    // Make a store file and write data to it.
1142    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1143      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1144    // We'll write N-1 KVs to ensure we don't write an extra block
1145    kvs.remove(kvs.size() - 1);
1146    for (KeyValue kv : kvs) {
1147      writer.append(kv);
1148    }
1149    writer.appendMetadata(0, false);
1150    writer.close();
1151    return writer;
1152  }
1153
1154  /**
1155   * Check if data block encoding information is saved correctly in HFile's file info.
1156   */
1157  @Test
1158  public void testDataBlockEncodingMetaData() throws IOException {
1159    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1160    Path dir = new Path(new Path(testDir, "7e0102"), "familyname");
1161    Path path = new Path(dir, "1234567890");
1162
1163    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1164    cacheConf = new CacheConfig(conf);
1165    HFileContext meta =
1166      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1167        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1168    // Make a store file and write data to it.
1169    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1170      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1171    writer.close();
1172
1173    HStoreFile storeFile =
1174      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1175    storeFile.initReader();
1176    StoreFileReader reader = storeFile.getReader();
1177
1178    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1179    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1180    assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
1181  }
1182
1183  @Test
1184  public void testDataBlockSizeEncoded() throws Exception {
1185    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1186    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1187    Path path = new Path(dir, "1234567890");
1188
1189    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1190
1191    conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1);
1192
1193    cacheConf = new CacheConfig(conf);
1194    HFileContext meta =
1195      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1196        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1197    // Make a store file and write data to it.
1198    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1199      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1200    writeStoreFile(writer);
1201
1202    HStoreFile storeFile =
1203      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1204    storeFile.initReader();
1205    StoreFileReader reader = storeFile.getReader();
1206
1207    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1208    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1209    assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value));
1210
1211    HFile.Reader fReader =
1212      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1213
1214    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1215    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1216    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1217    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1218    HFileBlock block;
1219    while (offset <= max) {
1220      block = fReader.readBlock(offset, -1, /* cacheBlock */
1221        false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */
1222        false, null, null);
1223      offset += block.getOnDiskSizeWithHeader();
1224      double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL;
1225      if (offset <= max) {
1226        assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05));
1227      }
1228    }
1229  }
1230
1231  @Test
1232  public void testDataBlockSizeCompressed() throws Exception {
1233    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR,
1234      PreviousBlockCompressionRatePredicator.class.getName());
1235    testDataBlockSizeWithCompressionRatePredicator(12,
1236      (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true);
1237  }
1238
1239  @Test
1240  public void testDataBlockSizeUnCompressed() throws Exception {
1241    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName());
1242    testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10);
1243  }
1244
1245  private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount,
1246    BiFunction<Integer, Integer, Boolean> validation) throws Exception {
1247    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1248    Path path = new Path(dir, "1234567890");
1249    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1250    cacheConf = new CacheConfig(conf);
1251    HFileContext meta =
1252      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1253        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo)
1254        .withCompression(Compression.Algorithm.GZ).build();
1255    // Make a store file and write data to it.
1256    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1257      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1258    writeLargeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
1259      Bytes.toBytes(name.getMethodName()), 200);
1260    writer.close();
1261    HStoreFile storeFile =
1262      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1263    storeFile.initReader();
1264    HFile.Reader fReader =
1265      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1266    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1267    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1268    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1269    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1270    HFileBlock block;
1271    int blockCount = 0;
1272    while (offset <= max) {
1273      block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
1274        /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
1275      offset += block.getOnDiskSizeWithHeader();
1276      blockCount++;
1277      assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount));
1278    }
1279    assertEquals(expectedBlockCount, blockCount);
1280  }
1281
1282}