001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR; 021import static org.junit.jupiter.api.Assertions.assertArrayEquals; 022import static org.junit.jupiter.api.Assertions.assertEquals; 023import static org.junit.jupiter.api.Assertions.assertFalse; 024import static org.junit.jupiter.api.Assertions.assertNotNull; 025import static org.junit.jupiter.api.Assertions.assertNull; 026import static org.junit.jupiter.api.Assertions.assertTrue; 027import static org.junit.jupiter.api.Assertions.fail; 028import static org.mockito.ArgumentMatchers.any; 029import static org.mockito.Mockito.mock; 030import static org.mockito.Mockito.when; 031 032import java.io.IOException; 033import java.nio.ByteBuffer; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Comparator; 038import java.util.List; 039import java.util.Map; 040import java.util.OptionalLong; 041import java.util.TreeSet; 042import java.util.function.BiFunction; 043import org.apache.hadoop.conf.Configuration; 044import org.apache.hadoop.fs.FileSystem; 045import org.apache.hadoop.fs.Path; 046import org.apache.hadoop.hbase.Cell; 047import org.apache.hadoop.hbase.CellUtil; 048import org.apache.hadoop.hbase.ExtendedCell; 049import org.apache.hadoop.hbase.HBaseTestingUtil; 050import org.apache.hadoop.hbase.HConstants; 051import org.apache.hadoop.hbase.KeyValue; 052import org.apache.hadoop.hbase.KeyValueUtil; 053import org.apache.hadoop.hbase.PrivateCellUtil; 054import org.apache.hadoop.hbase.TableDescriptors; 055import org.apache.hadoop.hbase.TableName; 056import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 058import org.apache.hadoop.hbase.client.RegionInfo; 059import org.apache.hadoop.hbase.client.RegionInfoBuilder; 060import org.apache.hadoop.hbase.client.Scan; 061import org.apache.hadoop.hbase.client.TableDescriptor; 062import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 063import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 064import org.apache.hadoop.hbase.io.HFileLink; 065import org.apache.hadoop.hbase.io.compress.Compression; 066import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 067import org.apache.hadoop.hbase.io.hfile.BlockCache; 068import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; 069import org.apache.hadoop.hbase.io.hfile.CacheConfig; 070import org.apache.hadoop.hbase.io.hfile.CacheStats; 071import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer; 072import org.apache.hadoop.hbase.io.hfile.HFile; 073import org.apache.hadoop.hbase.io.hfile.HFileBlock; 074import org.apache.hadoop.hbase.io.hfile.HFileContext; 075import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 076import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; 077import org.apache.hadoop.hbase.io.hfile.HFileScanner; 078import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator; 079import org.apache.hadoop.hbase.io.hfile.ReaderContext; 080import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 081import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator; 082import org.apache.hadoop.hbase.master.MasterServices; 083import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 084import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 085import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 086import org.apache.hadoop.hbase.testclassification.MediumTests; 087import org.apache.hadoop.hbase.testclassification.RegionServerTests; 088import org.apache.hadoop.hbase.util.BloomFilterFactory; 089import org.apache.hadoop.hbase.util.Bytes; 090import org.apache.hadoop.hbase.util.ChecksumType; 091import org.apache.hadoop.hbase.util.CommonFSUtils; 092import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 093import org.junit.jupiter.api.AfterAll; 094import org.junit.jupiter.api.BeforeEach; 095import org.junit.jupiter.api.Tag; 096import org.junit.jupiter.api.Test; 097import org.junit.jupiter.api.TestInfo; 098import org.mockito.Mockito; 099import org.slf4j.Logger; 100import org.slf4j.LoggerFactory; 101 102import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 103import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 104import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 105 106/** 107 * Test HStoreFile 108 */ 109@Tag(RegionServerTests.TAG) 110@Tag(MediumTests.TAG) 111public class TestHStoreFile { 112 113 private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class); 114 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 115 private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); 116 private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile"); 117 private static final ChecksumType CKTYPE = ChecksumType.CRC32C; 118 private static final int CKBYTES = 512; 119 private static String TEST_FAMILY = "cf"; 120 private static final char FIRST_CHAR = 'a'; 121 private static final char LAST_CHAR = 'z'; 122 private String name; 123 124 private Configuration conf; 125 private Path testDir; 126 private FileSystem fs; 127 128 @BeforeEach 129 public void setUp(TestInfo testInfo) throws IOException { 130 this.name = testInfo.getTestMethod().get().getName(); 131 conf = TEST_UTIL.getConfiguration(); 132 testDir = TEST_UTIL.getDataTestDir(name); 133 fs = testDir.getFileSystem(conf); 134 } 135 136 @AfterAll 137 public static void tearDownAfterClass() { 138 TEST_UTIL.cleanupTestDir(); 139 } 140 141 /** 142 * Write a file and then assert that we can read from top and bottom halves using two 143 * HalfMapFiles, as well as one HalfMapFile and one HFileLink file. 144 */ 145 146 @Test 147 public void testBasicHalfAndHFileLinkMapFile() throws Exception { 148 final RegionInfo hri = 149 RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build(); 150 // The locations of HFileLink refers hfiles only should be consistent with the table dir 151 // create by CommonFSUtils directory, so we should make the region directory under 152 // the mode of CommonFSUtils.getTableDir here. 153 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 154 CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri); 155 156 HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build(); 157 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 158 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 159 writeStoreFile(writer); 160 161 Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 162 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 163 StoreContext.getBuilder() 164 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 165 .withColumnFamilyDescriptor(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)) 166 .withRegionFileSystem(regionFs).build()); 167 HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true, sft); 168 checkHalfHFile(regionFs, sf, sft); 169 } 170 171 private void writeStoreFile(final StoreFileWriter writer) throws IOException { 172 writeStoreFile(writer, Bytes.toBytes(name), Bytes.toBytes(name)); 173 } 174 175 // pick an split point (roughly halfway) 176 byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR }; 177 178 /* 179 * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it. 180 */ 181 public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier) 182 throws IOException { 183 long now = EnvironmentEdgeManager.currentTime(); 184 try { 185 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 186 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 187 byte[] b = new byte[] { (byte) d, (byte) e }; 188 writer.append(new KeyValue(b, fam, qualifier, now, b)); 189 } 190 } 191 } finally { 192 writer.close(); 193 } 194 } 195 196 public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier, 197 int rounds) throws IOException { 198 long now = EnvironmentEdgeManager.currentTime(); 199 try { 200 for (int i = 0; i < rounds; i++) { 201 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 202 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 203 byte[] b = new byte[] { (byte) d, (byte) e }; 204 byte[] key = new byte[] { (byte) i }; 205 writer.append(new KeyValue(key, fam, qualifier, now, b)); 206 } 207 } 208 } 209 } finally { 210 writer.close(); 211 } 212 } 213 214 /** 215 * Test that our mechanism of writing store files in one region to reference store files in other 216 * regions works. 217 */ 218 @Test 219 public void testReference() throws IOException { 220 final RegionInfo hri = 221 RegionInfoBuilder.newBuilder(TableName.valueOf("testReferenceTb")).build(); 222 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 223 new Path(testDir, hri.getTable().getNameAsString()), hri); 224 225 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 226 // Make a store file and write data to it. 227 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 228 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 229 writeStoreFile(writer); 230 231 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 232 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 233 StoreContext.getBuilder() 234 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 235 .withRegionFileSystem(regionFs).build()); 236 HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true, sft); 237 hsf.initReader(); 238 StoreFileReader reader = hsf.getReader(); 239 // Split on a row, not in middle of row. Midkey returned by reader 240 // may be in middle of row. Create new one with empty column and 241 // timestamp. 242 byte[] midRow = CellUtil.cloneRow(reader.midKey().get()); 243 byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get()); 244 hsf.closeStoreFile(true); 245 246 // Make a reference 247 RegionInfo splitHri = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(midRow).build(); 248 Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true, sft); 249 HStoreFile refHsf = 250 new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true, sft); 251 refHsf.initReader(); 252 // Now confirm that I can read from the reference and that it only gets 253 // keys from top half of the file. 254 try (HFileScanner s = refHsf.getReader().getScanner(false, false, false)) { 255 Cell kv = null; 256 for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) { 257 ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey()); 258 kv = KeyValueUtil.createKeyValueFromKey(bb); 259 if (first) { 260 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0, 261 midRow.length)); 262 first = false; 263 } 264 } 265 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0, 266 finalRow.length)); 267 } 268 } 269 270 @Test 271 public void testStoreFileReference() throws Exception { 272 final RegionInfo hri = 273 RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build(); 274 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 275 new Path(testDir, hri.getTable().getNameAsString()), hri); 276 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 277 278 // Make a store file and write data to it. 279 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 280 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 281 writeStoreFile(writer); 282 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 283 writer.close(); 284 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 285 StoreContext.getBuilder() 286 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 287 .withRegionFileSystem(regionFs).build()); 288 HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true, sft); 289 file.initReader(); 290 StoreFileReader r = file.getReader(); 291 assertNotNull(r); 292 StoreFileScanner scanner = 293 new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false); 294 295 // Verify after instantiating scanner refCount is increased 296 assertTrue(file.isReferencedInReads(), "Verify file is being referenced"); 297 scanner.close(); 298 // Verify after closing scanner refCount is decreased 299 assertFalse(file.isReferencedInReads(), "Verify file is not being referenced"); 300 } 301 302 @Test 303 public void testEmptyStoreFileRestrictKeyRanges() throws Exception { 304 StoreFileReader reader = mock(StoreFileReader.class); 305 HStore store = mock(HStore.class); 306 byte[] cf = Bytes.toBytes("ty"); 307 ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf); 308 when(store.getColumnFamilyDescriptor()).thenReturn(cfd); 309 try (StoreFileScanner scanner = 310 new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) { 311 Scan scan = new Scan(); 312 scan.setColumnFamilyTimeRange(cf, 0, 1); 313 assertFalse(scanner.shouldUseScanner(scan, store, 0)); 314 } 315 } 316 317 @Test 318 public void testHFileLink() throws IOException { 319 final RegionInfo hri = 320 RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build(); 321 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 322 Configuration testConf = new Configuration(this.conf); 323 CommonFSUtils.setRootDir(testConf, testDir); 324 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 325 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 326 final RegionInfo dstHri = 327 RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build(); 328 HRegionFileSystem dstRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 329 CommonFSUtils.getTableDir(testDir, dstHri.getTable()), dstHri); 330 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 331 332 // Make a store file and write data to it. 333 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 334 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 335 writeStoreFile(writer); 336 337 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 338 Path dstPath = 339 new Path(regionFs.getTableDir(), new Path(dstHri.getRegionNameAsString(), TEST_FAMILY)); 340 Path linkFilePath = 341 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 342 343 // Try to open store file from link 344 345 // this should be the SFT for the destination link file path, though it is not 346 // being used right now, for the next patch file link creation logic also would 347 // move to SFT interface. 348 StoreFileTracker sft = StoreFileTrackerFactory.create(testConf, false, 349 StoreContext.getBuilder() 350 .withFamilyStoreDirectoryPath(new Path(dstHri.getRegionNameAsString(), TEST_FAMILY)) 351 .withColumnFamilyDescriptor(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)) 352 .withRegionFileSystem(dstRegionFs).build()); 353 sft.createHFileLink(hri.getTable(), hri.getEncodedName(), storeFilePath.getName(), true); 354 StoreFileInfo storeFileInfo = sft.getStoreFileInfo(linkFilePath, true); 355 HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 356 assertTrue(storeFileInfo.isLink()); 357 hsf.initReader(); 358 359 // Now confirm that I can read from the link 360 int count = 0; 361 try (StoreFileScanner scanner = hsf.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 362 scanner.seek(KeyValue.LOWESTKEY); 363 while (scanner.next() != null) { 364 count++; 365 } 366 } 367 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 368 } 369 370 @Test 371 public void testsample() { 372 Path p1 = new Path("/r1/c1"); 373 Path p2 = new Path("f1"); 374 System.out.println(new Path(p1, p2).toString()); 375 } 376 377 /** 378 * This test creates an hfile and then the dir structures and files to verify that references to 379 * hfilelinks (created by snapshot clones) can be properly interpreted. 380 */ 381 @Test 382 public void testReferenceToHFileLink() throws IOException { 383 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 384 Configuration testConf = new Configuration(this.conf); 385 CommonFSUtils.setRootDir(testConf, testDir); 386 387 // adding legal table name chars to verify regex handles it. 388 RegionInfo hri = RegionInfoBuilder.newBuilder(TableName.valueOf("_original-evil-name")).build(); 389 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 390 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 391 392 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 393 // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file> 394 StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs) 395 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 396 writeStoreFile(writer); 397 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 398 399 // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table> 400 RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build(); 401 HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 402 CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone); 403 Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY); 404 Path linkFilePath = 405 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 406 407 // create splits of the link. 408 // <root>/clone/splitA/<cf>/<reftohfilelink>, 409 // <root>/clone/splitB/<cf>/<reftohfilelink> 410 RegionInfo splitHriA = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(SPLITKEY).build(); 411 RegionInfo splitHriB = 412 RegionInfoBuilder.newBuilder(hri.getTable()).setStartKey(SPLITKEY).build(); 413 414 StoreFileTracker sft = StoreFileTrackerFactory.create(testConf, true, 415 StoreContext.getBuilder() 416 .withFamilyStoreDirectoryPath(new Path(hriClone.getRegionNameAsString(), TEST_FAMILY)) 417 .withColumnFamilyDescriptor(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)) 418 .withRegionFileSystem(cloneRegionFs).build()); 419 sft.createHFileLink(hri.getTable(), hri.getEncodedName(), storeFilePath.getName(), true); 420 421 HRegionFileSystem splitRegionAFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 422 CommonFSUtils.getTableDir(testDir, splitHriA.getTable()), splitHriA); 423 StoreFileTracker sftA = StoreFileTrackerFactory.create(testConf, true, 424 StoreContext.getBuilder() 425 .withFamilyStoreDirectoryPath(new Path(splitHriA.getRegionNameAsString(), TEST_FAMILY)) 426 .withRegionFileSystem(splitRegionAFs).build()); 427 HRegionFileSystem splitRegionBFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 428 CommonFSUtils.getTableDir(testDir, splitHriB.getTable()), splitHriB); 429 StoreFileTracker sftB = StoreFileTrackerFactory.create(testConf, true, 430 StoreContext.getBuilder() 431 .withFamilyStoreDirectoryPath(new Path(splitHriB.getRegionNameAsString(), TEST_FAMILY)) 432 .withRegionFileSystem(splitRegionBFs).build()); 433 HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true, sft); 434 f.initReader(); 435 // top 436 Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true, sft); 437 // bottom 438 Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false, sft); 439 f.closeStoreFile(true); 440 // OK test the thing 441 CommonFSUtils.logFileSystemState(fs, testDir, LOG); 442 443 // There is a case where a file with the hfilelink pattern is actually a daughter 444 // reference to a hfile link. This code in StoreFile that handles this case. 445 446 // Try to open store file from link 447 HStoreFile hsfA = 448 new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true, sftA); 449 hsfA.initReader(); 450 451 // Now confirm that I can read from the ref to link 452 int count = 0; 453 try (StoreFileScanner scanner = hsfA.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 454 scanner.seek(KeyValue.LOWESTKEY); 455 while (scanner.next() != null) { 456 count++; 457 } 458 assertTrue(count > 0); // read some rows here 459 } 460 461 // Try to open store file from link 462 HStoreFile hsfB = 463 new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true, sftB); 464 hsfB.initReader(); 465 466 // Now confirm that I can read from the ref to link 467 try (StoreFileScanner scanner = hsfB.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 468 scanner.seek(KeyValue.LOWESTKEY); 469 while (scanner.next() != null) { 470 count++; 471 } 472 } 473 474 // read the rest of the rows 475 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 476 } 477 478 private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f, 479 StoreFileTracker sft) throws IOException { 480 f.initReader(); 481 Cell midkey = f.getReader().midKey().get(); 482 KeyValue midKV = (KeyValue) midkey; 483 // 1. test using the midRow as the splitKey, this test will generate two Reference files 484 // in the children 485 byte[] midRow = CellUtil.cloneRow(midKV); 486 // Create top split. 487 RegionInfo topHri = 488 RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()).setEndKey(SPLITKEY).build(); 489 Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true, sft); 490 // Create bottom split. 491 RegionInfo bottomHri = RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()) 492 .setStartKey(SPLITKEY).build(); 493 Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false, sft); 494 // Make readers on top and bottom. 495 HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true, sft); 496 topF.initReader(); 497 StoreFileReader top = topF.getReader(); 498 HStoreFile bottomF = 499 new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true, sft); 500 bottomF.initReader(); 501 StoreFileReader bottom = bottomF.getReader(); 502 ByteBuffer previous = null; 503 LOG.info("Midkey: " + midKV.toString()); 504 ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey()); 505 try { 506 // Now make two HalfMapFiles and assert they can read the full backing 507 // file, one from the top and the other from the bottom. 508 // Test bottom half first. 509 // Now test reading from the top. 510 boolean first = true; 511 ByteBuffer key = null; 512 try (HFileScanner topScanner = top.getScanner(false, false, false)) { 513 while ( 514 (!topScanner.isSeeked() && topScanner.seekTo()) 515 || (topScanner.isSeeked() && topScanner.next()) 516 ) { 517 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 518 519 if ( 520 (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(), 521 key.arrayOffset(), key.limit())) > 0 522 ) { 523 fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey); 524 } 525 if (first) { 526 first = false; 527 LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key))); 528 } 529 } 530 } 531 LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key))); 532 533 first = true; 534 try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) { 535 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 536 previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 537 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 538 if (first) { 539 first = false; 540 LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 541 } 542 assertTrue(key.compareTo(bbMidkeyBytes) < 0); 543 } 544 if (previous != null) { 545 LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 546 } 547 } 548 // Remove references. 549 regionFs.cleanupDaughterRegion(topHri); 550 regionFs.cleanupDaughterRegion(bottomHri); 551 552 // 2. test using a midkey which will generate one Reference file and one HFileLink file. 553 // First, do a key that is < than first key. Ensure splits behave 554 // properly. 555 byte[] badmidkey = Bytes.toBytes(" ."); 556 assertTrue(fs.exists(f.getPath())); 557 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true, sft); 558 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false, sft); 559 560 assertNull(bottomPath); 561 562 topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true, sft); 563 topF.initReader(); 564 top = topF.getReader(); 565 // Now read from the top. 566 first = true; 567 try (HFileScanner topScanner = top.getScanner(false, false, false)) { 568 KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue(); 569 while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) { 570 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 571 keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit()); 572 assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV, 573 badmidkey, 0, badmidkey.length) >= 0); 574 if (first) { 575 first = false; 576 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 577 LOG.info("First top when key < bottom: " + keyKV); 578 String tmp = 579 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 580 for (int i = 0; i < tmp.length(); i++) { 581 assertTrue(tmp.charAt(i) == 'a'); 582 } 583 } 584 } 585 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 586 LOG.info("Last top when key < bottom: " + keyKV); 587 String tmp = 588 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 589 for (int i = 0; i < tmp.length(); i++) { 590 assertTrue(tmp.charAt(i) == 'z'); 591 } 592 } 593 // Remove references. 594 regionFs.cleanupDaughterRegion(topHri); 595 regionFs.cleanupDaughterRegion(bottomHri); 596 597 // Test when badkey is > than last key in file ('||' > 'zz'). 598 badmidkey = Bytes.toBytes("|||"); 599 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true, sft); 600 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false, sft); 601 assertNull(topPath); 602 603 bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true, sft); 604 bottomF.initReader(); 605 bottom = bottomF.getReader(); 606 first = true; 607 try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) { 608 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 609 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 610 if (first) { 611 first = false; 612 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 613 LOG.info("First bottom when key > top: " + keyKV); 614 String tmp = 615 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 616 for (int i = 0; i < tmp.length(); i++) { 617 assertTrue(tmp.charAt(i) == 'a'); 618 } 619 } 620 } 621 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 622 LOG.info("Last bottom when key > top: " + keyKV); 623 String tmp = 624 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 625 for (int i = 0; i < tmp.length(); i++) { 626 assertTrue(Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()) 627 .charAt(i) == 'z'); 628 } 629 } 630 } finally { 631 if (top != null) { 632 top.close(true); // evict since we are about to delete the file 633 } 634 if (bottom != null) { 635 bottom.close(true); // evict since we are about to delete the file 636 } 637 fs.delete(f.getPath(), true); 638 } 639 } 640 641 private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks, 642 boolean pread) { 643 return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false); 644 } 645 646 private static final String localFormatter = "%010d"; 647 648 private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception { 649 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 650 Path f = writer.getPath(); 651 long now = EnvironmentEdgeManager.currentTime(); 652 for (int i = 0; i < 2000; i += 2) { 653 String row = String.format(localFormatter, i); 654 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 655 now, Bytes.toBytes("value")); 656 writer.append(kv); 657 } 658 writer.close(); 659 660 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 661 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 662 storeFileInfo.initHFileInfo(context); 663 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 664 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 665 reader.loadFileInfo(); 666 reader.loadBloomfilter(); 667 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 668 669 // check false positives rate 670 int falsePos = 0; 671 int falseNeg = 0; 672 for (int i = 0; i < 2000; i++) { 673 String row = String.format(localFormatter, i); 674 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 675 columns.add(Bytes.toBytes("family:col")); 676 677 Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 678 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col")); 679 HStore store = mock(HStore.class); 680 when(store.getColumnFamilyDescriptor()) 681 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 682 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 683 if (i % 2 == 0) { 684 if (!exists) { 685 falseNeg++; 686 } 687 } else { 688 if (exists) { 689 falsePos++; 690 } 691 } 692 } 693 reader.close(true); // evict because we are about to delete the file 694 fs.delete(f, true); 695 assertEquals(0, falseNeg, "False negatives: " + falseNeg); 696 int maxFalsePos = (int) (2 * 2000 * err); 697 assertTrue(falsePos <= maxFalsePos, "Too many false positives: " + falsePos + " (err=" + err 698 + ", expected no more than " + maxFalsePos + ")"); 699 } 700 701 private static final int BLOCKSIZE_SMALL = 8192; 702 703 @Test 704 public void testBloomFilter() throws Exception { 705 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 706 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 707 708 // write the file 709 if (!fs.exists(ROOT_DIR)) { 710 fs.mkdirs(ROOT_DIR); 711 } 712 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 713 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 714 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 715 // Make a store file and write data to it. 716 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 717 .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build(); 718 bloomWriteRead(writer, fs); 719 } 720 721 @Test 722 public void testDeleteFamilyBloomFilter() throws Exception { 723 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 724 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 725 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 726 727 // write the file 728 if (!fs.exists(ROOT_DIR)) { 729 fs.mkdirs(ROOT_DIR); 730 } 731 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 732 733 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 734 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 735 // Make a store file and write data to it. 736 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 737 .withMaxKeyCount(2000).withFileContext(meta).build(); 738 739 // add delete family 740 long now = EnvironmentEdgeManager.currentTime(); 741 for (int i = 0; i < 2000; i += 2) { 742 String row = String.format(localFormatter, i); 743 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 744 now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value")); 745 writer.append(kv); 746 } 747 writer.close(); 748 749 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 750 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 751 storeFileInfo.initHFileInfo(context); 752 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 753 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 754 reader.loadFileInfo(); 755 reader.loadBloomfilter(); 756 757 // check false positives rate 758 int falsePos = 0; 759 int falseNeg = 0; 760 for (int i = 0; i < 2000; i++) { 761 String row = String.format(localFormatter, i); 762 byte[] rowKey = Bytes.toBytes(row); 763 boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length); 764 if (i % 2 == 0) { 765 if (!exists) { 766 falseNeg++; 767 } 768 } else { 769 if (exists) { 770 falsePos++; 771 } 772 } 773 } 774 assertEquals(1000, reader.getDeleteFamilyCnt()); 775 reader.close(true); // evict because we are about to delete the file 776 fs.delete(f, true); 777 assertEquals(0, falseNeg, "False negatives: " + falseNeg); 778 int maxFalsePos = (int) (2 * 2000 * err); 779 assertTrue(falsePos <= maxFalsePos, "Too many false positives: " + falsePos + " (err=" + err 780 + ", expected no more than " + maxFalsePos); 781 } 782 783 /** 784 * Test for HBASE-8012 785 */ 786 @Test 787 public void testReseek() throws Exception { 788 // write the file 789 if (!fs.exists(ROOT_DIR)) { 790 fs.mkdirs(ROOT_DIR); 791 } 792 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 793 794 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 795 // Make a store file and write data to it. 796 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 797 .withFileContext(meta).build(); 798 799 writeStoreFile(writer); 800 writer.close(); 801 802 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 803 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 804 storeFileInfo.initHFileInfo(context); 805 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 806 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 807 808 // Now do reseek with empty KV to position to the beginning of the file 809 810 KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY); 811 StoreFileScanner s = getStoreFileScanner(reader, false, false); 812 s.reseek(k); 813 814 assertNotNull(s.peek(), "Intial reseek should position at the beginning of the file"); 815 } 816 817 @Test 818 public void testBloomTypes() throws Exception { 819 float err = (float) 0.01; 820 FileSystem fs = FileSystem.getLocal(conf); 821 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err); 822 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 823 824 int rowCount = 50; 825 int colCount = 10; 826 int versions = 2; 827 828 // run once using columns and once using rows 829 BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW }; 830 int[] expKeys = { rowCount * colCount, rowCount }; 831 // below line deserves commentary. it is expected bloom false positives 832 // column = rowCount*2*colCount inserts 833 // row-level = only rowCount*2 inserts, but failures will be magnified by 834 // 2nd for loop for every column (2*colCount) 835 float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err }; 836 837 if (!fs.exists(ROOT_DIR)) { 838 fs.mkdirs(ROOT_DIR); 839 } 840 for (int x : new int[] { 0, 1 }) { 841 // write the file 842 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 843 844 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 845 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 846 // Make a store file and write data to it. 847 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 848 .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build(); 849 850 long now = EnvironmentEdgeManager.currentTime(); 851 for (int i = 0; i < rowCount * 2; i += 2) { // rows 852 for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers 853 String row = String.format(localFormatter, i); 854 String col = String.format(localFormatter, j); 855 for (int k = 0; k < versions; ++k) { // versions 856 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), 857 Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L)); 858 writer.append(kv); 859 } 860 } 861 } 862 writer.close(); 863 864 ReaderContext context = 865 new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen()) 866 .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build(); 867 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 868 storeFileInfo.initHFileInfo(context); 869 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 870 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 871 reader.loadFileInfo(); 872 reader.loadBloomfilter(); 873 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 874 assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount()); 875 876 HStore store = mock(HStore.class); 877 when(store.getColumnFamilyDescriptor()) 878 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 879 // check false positives rate 880 int falsePos = 0; 881 int falseNeg = 0; 882 for (int i = 0; i < rowCount * 2; ++i) { // rows 883 for (int j = 0; j < colCount * 2; ++j) { // column qualifiers 884 String row = String.format(localFormatter, i); 885 String col = String.format(localFormatter, j); 886 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 887 columns.add(Bytes.toBytes("col" + col)); 888 889 Scan scan = 890 new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 891 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col))); 892 893 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 894 boolean shouldRowExist = i % 2 == 0; 895 boolean shouldColExist = j % 2 == 0; 896 shouldColExist = shouldColExist || bt[x] == BloomType.ROW; 897 if (shouldRowExist && shouldColExist) { 898 if (!exists) { 899 falseNeg++; 900 } 901 } else { 902 if (exists) { 903 falsePos++; 904 } 905 } 906 } 907 } 908 reader.close(true); // evict because we are about to delete the file 909 fs.delete(f, true); 910 System.out.println(bt[x].toString()); 911 System.out.println(" False negatives: " + falseNeg); 912 System.out.println(" False positives: " + falsePos); 913 assertEquals(0, falseNeg); 914 assertTrue(falsePos < 2 * expErr[x]); 915 } 916 } 917 918 @Test 919 public void testSeqIdComparator() { 920 assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"), 921 mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"), 922 mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"), 923 mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"), 924 mockStoreFile(false, 76, -1, 5, "/foo/3")); 925 } 926 927 /** 928 * Assert that the given comparator orders the given storefiles in the same way that they're 929 * passed. 930 */ 931 private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) { 932 ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs); 933 Collections.shuffle(sorted); 934 Collections.sort(sorted, comparator); 935 LOG.debug("sfs: " + Joiner.on(",").join(sfs)); 936 LOG.debug("sorted: " + Joiner.on(",").join(sorted)); 937 assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted)); 938 } 939 940 /** 941 * Create a mock StoreFile with the given attributes. 942 */ 943 private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId, 944 String path) { 945 HStoreFile mock = Mockito.mock(HStoreFile.class); 946 StoreFileReader reader = Mockito.mock(StoreFileReader.class); 947 948 Mockito.doReturn(size).when(reader).length(); 949 950 Mockito.doReturn(reader).when(mock).getReader(); 951 Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult(); 952 Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp(); 953 Mockito.doReturn(seqId).when(mock).getMaxSequenceId(); 954 Mockito.doReturn(new Path(path)).when(mock).getPath(); 955 String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp 956 + " seqId=" + seqId + " path=" + path; 957 Mockito.doReturn(name).when(mock).toString(); 958 return mock; 959 } 960 961 /** 962 * Generate a list of KeyValues for testing based on given parameters 963 * @return the rows key-value list 964 */ 965 List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) { 966 List<KeyValue> kvList = new ArrayList<>(); 967 for (int i = 1; i <= numRows; i++) { 968 byte[] b = Bytes.toBytes(i); 969 LOG.info(Bytes.toString(b)); 970 LOG.info(Bytes.toString(b)); 971 for (long timestamp : timestamps) { 972 kvList.add(new KeyValue(b, family, qualifier, timestamp, b)); 973 } 974 } 975 return kvList; 976 } 977 978 /** 979 * Test to ensure correctness when using StoreFile with multiple timestamps 980 */ 981 @Test 982 public void testMultipleTimestamps() throws IOException { 983 byte[] family = Bytes.toBytes("familyname"); 984 byte[] qualifier = Bytes.toBytes("qualifier"); 985 int numRows = 10; 986 long[] timestamps = new long[] { 20, 10, 5, 1 }; 987 Scan scan = new Scan(); 988 989 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 990 Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family)); 991 Path dir = new Path(storedir, "1234567890"); 992 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 993 // Make a store file and write data to it. 994 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 995 .withOutputDir(dir).withFileContext(meta).build(); 996 997 List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family); 998 999 for (KeyValue kv : kvList) { 1000 writer.append(kv); 1001 } 1002 writer.appendMetadata(0, false); 1003 writer.close(); 1004 1005 StoreFileInfo storeFileInfo = 1006 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1007 HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1008 HStore store = mock(HStore.class); 1009 when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family)); 1010 hsf.initReader(); 1011 StoreFileReader reader = hsf.getReader(); 1012 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 1013 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 1014 columns.add(qualifier); 1015 1016 scan.setTimeRange(20, 100); 1017 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1018 1019 scan.setTimeRange(1, 2); 1020 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1021 1022 scan.setTimeRange(8, 10); 1023 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1024 1025 // lets make sure it still works with column family time ranges 1026 scan.setColumnFamilyTimeRange(family, 7, 50); 1027 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1028 1029 // This test relies on the timestamp range optimization 1030 scan = new Scan(); 1031 scan.setTimeRange(27, 50); 1032 assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1033 1034 // should still use the scanner because we override the family time range 1035 scan = new Scan(); 1036 scan.setTimeRange(27, 50); 1037 scan.setColumnFamilyTimeRange(family, 7, 50); 1038 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1039 } 1040 1041 @Test 1042 public void testCacheOnWriteEvictOnClose() throws Exception { 1043 Configuration conf = this.conf; 1044 1045 // Find a home for our files (regiondir ("7e0102") and familyname). 1046 Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC"); 1047 1048 // Grab the block cache and get the initial hit/miss counts 1049 BlockCache bc = BlockCacheFactory.createBlockCache(conf); 1050 assertNotNull(bc); 1051 CacheStats cs = bc.getStats(); 1052 long startHit = cs.getHitCount(); 1053 long startMiss = cs.getMissCount(); 1054 long startEvicted = cs.getEvictedCount(); 1055 1056 // Let's write a StoreFile with three blocks, with cache on write off 1057 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false); 1058 CacheConfig cacheConf = new CacheConfig(conf, bc); 1059 Path pathCowOff = new Path(baseDir, "123456789"); 1060 StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3); 1061 StoreFileInfo storeFileInfo = 1062 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1063 HStoreFile hsfCowOff = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1064 LOG.debug(hsfCowOff.getPath().toString()); 1065 1066 // Read this file, we should see 3 misses 1067 hsfCowOff.initReader(); 1068 StoreFileReader reader = hsfCowOff.getReader(); 1069 reader.loadFileInfo(); 1070 StoreFileScanner scanner = getStoreFileScanner(reader, true, true); 1071 scanner.seek(KeyValue.LOWESTKEY); 1072 while (scanner.next() != null) { 1073 continue; 1074 } 1075 assertEquals(startHit, cs.getHitCount()); 1076 assertEquals(startMiss + 3, cs.getMissCount()); 1077 assertEquals(startEvicted, cs.getEvictedCount()); 1078 startMiss += 3; 1079 scanner.close(); 1080 reader.close(cacheConf.shouldEvictOnClose()); 1081 1082 // Now write a StoreFile with three blocks, with cache on write on 1083 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true); 1084 cacheConf = new CacheConfig(conf, bc); 1085 Path pathCowOn = new Path(baseDir, "123456788"); 1086 writer = writeStoreFile(conf, cacheConf, pathCowOn, 3); 1087 storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1088 HStoreFile hsfCowOn = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1089 1090 // Read this file, we should see 3 hits 1091 hsfCowOn.initReader(); 1092 reader = hsfCowOn.getReader(); 1093 scanner = getStoreFileScanner(reader, true, true); 1094 scanner.seek(KeyValue.LOWESTKEY); 1095 while (scanner.next() != null) { 1096 continue; 1097 } 1098 assertEquals(startHit + 3, cs.getHitCount()); 1099 assertEquals(startMiss, cs.getMissCount()); 1100 assertEquals(startEvicted, cs.getEvictedCount()); 1101 startHit += 3; 1102 scanner.close(); 1103 reader.close(cacheConf.shouldEvictOnClose()); 1104 1105 // Let's read back the two files to ensure the blocks exactly match 1106 hsfCowOff.initReader(); 1107 StoreFileReader readerOne = hsfCowOff.getReader(); 1108 readerOne.loadFileInfo(); 1109 StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true); 1110 scannerOne.seek(KeyValue.LOWESTKEY); 1111 hsfCowOn.initReader(); 1112 StoreFileReader readerTwo = hsfCowOn.getReader(); 1113 readerTwo.loadFileInfo(); 1114 StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true); 1115 scannerTwo.seek(KeyValue.LOWESTKEY); 1116 ExtendedCell kv1 = null; 1117 ExtendedCell kv2 = null; 1118 while ((kv1 = scannerOne.next()) != null) { 1119 kv2 = scannerTwo.next(); 1120 assertTrue(kv1.equals(kv2)); 1121 KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1); 1122 KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2); 1123 assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(), 1124 keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0); 1125 assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(), 1126 kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0); 1127 } 1128 assertNull(scannerTwo.next()); 1129 assertEquals(startHit + 6, cs.getHitCount()); 1130 assertEquals(startMiss, cs.getMissCount()); 1131 assertEquals(startEvicted, cs.getEvictedCount()); 1132 startHit += 6; 1133 scannerOne.close(); 1134 readerOne.close(cacheConf.shouldEvictOnClose()); 1135 scannerTwo.close(); 1136 readerTwo.close(cacheConf.shouldEvictOnClose()); 1137 1138 // Let's close the first file with evict on close turned on 1139 conf.setBoolean("hbase.rs.evictblocksonclose", true); 1140 cacheConf = new CacheConfig(conf, bc); 1141 hsfCowOff.initReader(); 1142 reader = hsfCowOff.getReader(); 1143 reader.close(cacheConf.shouldEvictOnClose()); 1144 1145 // We should have 3 new evictions but the evict count stat should not change. Eviction because 1146 // of HFile invalidation is not counted along with normal evictions 1147 assertEquals(startHit, cs.getHitCount()); 1148 assertEquals(startMiss, cs.getMissCount()); 1149 assertEquals(startEvicted, cs.getEvictedCount()); 1150 1151 // Let's close the second file with evict on close turned off 1152 conf.setBoolean("hbase.rs.evictblocksonclose", false); 1153 cacheConf = new CacheConfig(conf, bc); 1154 hsfCowOn.initReader(); 1155 reader = hsfCowOn.getReader(); 1156 reader.close(cacheConf.shouldEvictOnClose()); 1157 1158 // We expect no changes 1159 assertEquals(startHit, cs.getHitCount()); 1160 assertEquals(startMiss, cs.getMissCount()); 1161 assertEquals(startEvicted, cs.getEvictedCount()); 1162 } 1163 1164 private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri, 1165 final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef, 1166 StoreFileTracker sft) throws IOException { 1167 StoreFileInfo sfi = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null, sft); 1168 if (null == sfi) { 1169 return null; 1170 } 1171 List<StoreFileInfo> splitFiles = new ArrayList<>(); 1172 splitFiles.add(sfi); 1173 MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class); 1174 MasterServices mockServices = mock(MasterServices.class); 1175 when(mockEnv.getMasterServices()).thenReturn(mockServices); 1176 when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration()); 1177 TableDescriptors mockTblDescs = mock(TableDescriptors.class); 1178 when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs); 1179 TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable()) 1180 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); 1181 when(mockTblDescs.get(any())).thenReturn(mockTblDesc); 1182 Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv); 1183 return new Path(new Path(regionDir, family), sfi.getPath().getName()); 1184 } 1185 1186 private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path, 1187 int numBlocks) throws IOException { 1188 // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs 1189 int numKVs = 5 * numBlocks; 1190 List<KeyValue> kvs = new ArrayList<>(numKVs); 1191 byte[] b = Bytes.toBytes("x"); 1192 int totalSize = 0; 1193 for (int i = numKVs; i > 0; i--) { 1194 KeyValue kv = new KeyValue(b, b, b, i, b); 1195 kvs.add(kv); 1196 // kv has memstoreTS 0, which takes 1 byte to store. 1197 totalSize += kv.getLength() + 1; 1198 } 1199 int blockSize = totalSize / numBlocks; 1200 HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE) 1201 .withBytesPerCheckSum(CKBYTES).build(); 1202 // Make a store file and write data to it. 1203 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1204 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1205 // We'll write N-1 KVs to ensure we don't write an extra block 1206 kvs.remove(kvs.size() - 1); 1207 for (KeyValue kv : kvs) { 1208 writer.append(kv); 1209 } 1210 writer.appendMetadata(0, false); 1211 writer.close(); 1212 return writer; 1213 } 1214 1215 /** 1216 * Check if data block encoding information is saved correctly in HFile's file info. 1217 */ 1218 @Test 1219 public void testDataBlockEncodingMetaData() throws IOException { 1220 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1221 Path dir = new Path(new Path(testDir, "7e0102"), "familyname"); 1222 Path path = new Path(dir, "1234567890"); 1223 1224 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1225 cacheConf = new CacheConfig(conf); 1226 HFileContext meta = 1227 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1228 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1229 // Make a store file and write data to it. 1230 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1231 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1232 writer.close(); 1233 1234 StoreFileInfo storeFileInfo = 1235 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1236 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1237 storeFile.initReader(); 1238 StoreFileReader reader = storeFile.getReader(); 1239 1240 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1241 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1242 assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value); 1243 } 1244 1245 @Test 1246 public void testDataBlockSizeEncoded() throws Exception { 1247 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1248 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1249 Path path = new Path(dir, "1234567890"); 1250 1251 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1252 1253 conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1); 1254 1255 cacheConf = new CacheConfig(conf); 1256 HFileContext meta = 1257 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1258 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1259 // Make a store file and write data to it. 1260 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1261 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1262 writeStoreFile(writer); 1263 1264 StoreFileInfo storeFileInfo = 1265 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1266 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1267 storeFile.initReader(); 1268 StoreFileReader reader = storeFile.getReader(); 1269 1270 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1271 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1272 assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value)); 1273 1274 HFile.Reader fReader = 1275 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1276 1277 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1278 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1279 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1280 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1281 HFileBlock block; 1282 while (offset <= max) { 1283 block = fReader.readBlock(offset, -1, /* cacheBlock */ 1284 false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */ 1285 false, null, null); 1286 offset += block.getOnDiskSizeWithHeader(); 1287 double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL; 1288 if (offset <= max) { 1289 assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05)); 1290 } 1291 } 1292 } 1293 1294 @Test 1295 public void testDataBlockSizeCompressed() throws Exception { 1296 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, 1297 PreviousBlockCompressionRatePredicator.class.getName()); 1298 testDataBlockSizeWithCompressionRatePredicator(12, 1299 (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true); 1300 } 1301 1302 @Test 1303 public void testDataBlockSizeUnCompressed() throws Exception { 1304 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName()); 1305 testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10); 1306 } 1307 1308 private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount, 1309 BiFunction<Integer, Integer, Boolean> validation) throws Exception { 1310 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1311 Path path = new Path(dir, "1234567890"); 1312 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1313 cacheConf = new CacheConfig(conf); 1314 HFileContext meta = 1315 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1316 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo) 1317 .withCompression(Compression.Algorithm.GZ).build(); 1318 // Make a store file and write data to it. 1319 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1320 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1321 writeLargeStoreFile(writer, Bytes.toBytes(name), Bytes.toBytes(name), 200); 1322 writer.close(); 1323 StoreFileInfo storeFileInfo = 1324 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1325 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1326 storeFile.initReader(); 1327 HFile.Reader fReader = 1328 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1329 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1330 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1331 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1332 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1333 HFileBlock block; 1334 int blockCount = 0; 1335 while (offset <= max) { 1336 block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false, 1337 /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); 1338 offset += block.getOnDiskSizeWithHeader(); 1339 blockCount++; 1340 assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount)); 1341 } 1342 assertEquals(expectedBlockCount, blockCount); 1343 } 1344 1345}