001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR; 021import static org.junit.Assert.assertArrayEquals; 022import static org.junit.Assert.assertEquals; 023import static org.junit.Assert.assertFalse; 024import static org.junit.Assert.assertNotNull; 025import static org.junit.Assert.assertNull; 026import static org.junit.Assert.assertTrue; 027import static org.junit.Assert.fail; 028import static org.mockito.ArgumentMatchers.any; 029import static org.mockito.Mockito.mock; 030import static org.mockito.Mockito.when; 031 032import java.io.IOException; 033import java.nio.ByteBuffer; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Comparator; 038import java.util.List; 039import java.util.Map; 040import java.util.OptionalLong; 041import java.util.TreeSet; 042import java.util.function.BiFunction; 043import org.apache.hadoop.conf.Configuration; 044import org.apache.hadoop.fs.FileSystem; 045import org.apache.hadoop.fs.Path; 046import org.apache.hadoop.hbase.Cell; 047import org.apache.hadoop.hbase.CellUtil; 048import org.apache.hadoop.hbase.HBaseClassTestRule; 049import org.apache.hadoop.hbase.HBaseTestingUtil; 050import org.apache.hadoop.hbase.HConstants; 051import org.apache.hadoop.hbase.KeyValue; 052import org.apache.hadoop.hbase.KeyValueUtil; 053import org.apache.hadoop.hbase.PrivateCellUtil; 054import org.apache.hadoop.hbase.TableDescriptors; 055import org.apache.hadoop.hbase.TableName; 056import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 058import org.apache.hadoop.hbase.client.RegionInfo; 059import org.apache.hadoop.hbase.client.RegionInfoBuilder; 060import org.apache.hadoop.hbase.client.Scan; 061import org.apache.hadoop.hbase.client.TableDescriptor; 062import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 063import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 064import org.apache.hadoop.hbase.io.HFileLink; 065import org.apache.hadoop.hbase.io.compress.Compression; 066import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 067import org.apache.hadoop.hbase.io.hfile.BlockCache; 068import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; 069import org.apache.hadoop.hbase.io.hfile.CacheConfig; 070import org.apache.hadoop.hbase.io.hfile.CacheStats; 071import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer; 072import org.apache.hadoop.hbase.io.hfile.HFile; 073import org.apache.hadoop.hbase.io.hfile.HFileBlock; 074import org.apache.hadoop.hbase.io.hfile.HFileContext; 075import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 076import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; 077import org.apache.hadoop.hbase.io.hfile.HFileScanner; 078import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator; 079import org.apache.hadoop.hbase.io.hfile.ReaderContext; 080import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 081import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator; 082import org.apache.hadoop.hbase.master.MasterServices; 083import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 084import org.apache.hadoop.hbase.testclassification.MediumTests; 085import org.apache.hadoop.hbase.testclassification.RegionServerTests; 086import org.apache.hadoop.hbase.util.BloomFilterFactory; 087import org.apache.hadoop.hbase.util.Bytes; 088import org.apache.hadoop.hbase.util.ChecksumType; 089import org.apache.hadoop.hbase.util.CommonFSUtils; 090import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 091import org.junit.AfterClass; 092import org.junit.Before; 093import org.junit.ClassRule; 094import org.junit.Rule; 095import org.junit.Test; 096import org.junit.experimental.categories.Category; 097import org.junit.rules.TestName; 098import org.mockito.Mockito; 099import org.slf4j.Logger; 100import org.slf4j.LoggerFactory; 101 102import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 103import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 104import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 105 106/** 107 * Test HStoreFile 108 */ 109@Category({ RegionServerTests.class, MediumTests.class }) 110public class TestHStoreFile { 111 112 @ClassRule 113 public static final HBaseClassTestRule CLASS_RULE = 114 HBaseClassTestRule.forClass(TestHStoreFile.class); 115 116 private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class); 117 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 118 private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); 119 private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile"); 120 private static final ChecksumType CKTYPE = ChecksumType.CRC32C; 121 private static final int CKBYTES = 512; 122 private static String TEST_FAMILY = "cf"; 123 private static final char FIRST_CHAR = 'a'; 124 private static final char LAST_CHAR = 'z'; 125 126 @Rule 127 public TestName name = new TestName(); 128 129 private Configuration conf; 130 private Path testDir; 131 private FileSystem fs; 132 133 @Before 134 public void setUp() throws IOException { 135 conf = TEST_UTIL.getConfiguration(); 136 testDir = TEST_UTIL.getDataTestDir(name.getMethodName()); 137 fs = testDir.getFileSystem(conf); 138 } 139 140 @AfterClass 141 public static void tearDownAfterClass() { 142 TEST_UTIL.cleanupTestDir(); 143 } 144 145 /** 146 * Write a file and then assert that we can read from top and bottom halves using two 147 * HalfMapFiles, as well as one HalfMapFile and one HFileLink file. 148 */ 149 @Test 150 public void testBasicHalfAndHFileLinkMapFile() throws Exception { 151 final RegionInfo hri = 152 RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build(); 153 // The locations of HFileLink refers hfiles only should be consistent with the table dir 154 // create by CommonFSUtils directory, so we should make the region directory under 155 // the mode of CommonFSUtils.getTableDir here. 156 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 157 CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri); 158 159 HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build(); 160 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 161 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 162 writeStoreFile(writer); 163 164 Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 165 HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true); 166 checkHalfHFile(regionFs, sf); 167 } 168 169 private void writeStoreFile(final StoreFileWriter writer) throws IOException { 170 writeStoreFile(writer, Bytes.toBytes(name.getMethodName()), 171 Bytes.toBytes(name.getMethodName())); 172 } 173 174 // pick an split point (roughly halfway) 175 byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR }; 176 177 /* 178 * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it. 179 */ 180 public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier) 181 throws IOException { 182 long now = EnvironmentEdgeManager.currentTime(); 183 try { 184 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 185 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 186 byte[] b = new byte[] { (byte) d, (byte) e }; 187 writer.append(new KeyValue(b, fam, qualifier, now, b)); 188 } 189 } 190 } finally { 191 writer.close(); 192 } 193 } 194 195 public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier, 196 int rounds) throws IOException { 197 long now = EnvironmentEdgeManager.currentTime(); 198 try { 199 for (int i = 0; i < rounds; i++) { 200 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 201 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 202 byte[] b = new byte[] { (byte) d, (byte) e }; 203 byte[] key = new byte[] { (byte) i }; 204 writer.append(new KeyValue(key, fam, qualifier, now, b)); 205 } 206 } 207 } 208 } finally { 209 writer.close(); 210 } 211 } 212 213 /** 214 * Test that our mechanism of writing store files in one region to reference store files in other 215 * regions works. 216 */ 217 @Test 218 public void testReference() throws IOException { 219 final RegionInfo hri = 220 RegionInfoBuilder.newBuilder(TableName.valueOf("testReferenceTb")).build(); 221 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 222 new Path(testDir, hri.getTable().getNameAsString()), hri); 223 224 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 225 // Make a store file and write data to it. 226 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 227 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 228 writeStoreFile(writer); 229 230 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 231 HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true); 232 hsf.initReader(); 233 StoreFileReader reader = hsf.getReader(); 234 // Split on a row, not in middle of row. Midkey returned by reader 235 // may be in middle of row. Create new one with empty column and 236 // timestamp. 237 byte[] midRow = CellUtil.cloneRow(reader.midKey().get()); 238 byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get()); 239 hsf.closeStoreFile(true); 240 241 // Make a reference 242 RegionInfo splitHri = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(midRow).build(); 243 Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true); 244 HStoreFile refHsf = new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true); 245 refHsf.initReader(); 246 // Now confirm that I can read from the reference and that it only gets 247 // keys from top half of the file. 248 HFileScanner s = refHsf.getReader().getScanner(false, false); 249 Cell kv = null; 250 for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) { 251 ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey()); 252 kv = KeyValueUtil.createKeyValueFromKey(bb); 253 if (first) { 254 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0, 255 midRow.length)); 256 first = false; 257 } 258 } 259 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0, 260 finalRow.length)); 261 } 262 263 @Test 264 public void testStoreFileReference() throws Exception { 265 final RegionInfo hri = 266 RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build(); 267 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 268 new Path(testDir, hri.getTable().getNameAsString()), hri); 269 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 270 271 // Make a store file and write data to it. 272 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 273 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 274 writeStoreFile(writer); 275 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 276 writer.close(); 277 278 HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true); 279 file.initReader(); 280 StoreFileReader r = file.getReader(); 281 assertNotNull(r); 282 StoreFileScanner scanner = 283 new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false); 284 285 // Verify after instantiating scanner refCount is increased 286 assertTrue("Verify file is being referenced", file.isReferencedInReads()); 287 scanner.close(); 288 // Verify after closing scanner refCount is decreased 289 assertFalse("Verify file is not being referenced", file.isReferencedInReads()); 290 } 291 292 @Test 293 public void testEmptyStoreFileRestrictKeyRanges() throws Exception { 294 StoreFileReader reader = mock(StoreFileReader.class); 295 HStore store = mock(HStore.class); 296 byte[] cf = Bytes.toBytes("ty"); 297 ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf); 298 when(store.getColumnFamilyDescriptor()).thenReturn(cfd); 299 try (StoreFileScanner scanner = 300 new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true)) { 301 Scan scan = new Scan(); 302 scan.setColumnFamilyTimeRange(cf, 0, 1); 303 assertFalse(scanner.shouldUseScanner(scan, store, 0)); 304 } 305 } 306 307 @Test 308 public void testHFileLink() throws IOException { 309 final RegionInfo hri = 310 RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build(); 311 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 312 Configuration testConf = new Configuration(this.conf); 313 CommonFSUtils.setRootDir(testConf, testDir); 314 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 315 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 316 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 317 318 // Make a store file and write data to it. 319 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 320 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 321 writeStoreFile(writer); 322 323 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 324 Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY)); 325 HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName()); 326 Path linkFilePath = 327 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 328 329 // Try to open store file from link 330 StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath, true); 331 HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 332 assertTrue(storeFileInfo.isLink()); 333 hsf.initReader(); 334 335 // Now confirm that I can read from the link 336 int count = 1; 337 HFileScanner s = hsf.getReader().getScanner(false, false); 338 s.seekTo(); 339 while (s.next()) { 340 count++; 341 } 342 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 343 } 344 345 /** 346 * This test creates an hfile and then the dir structures and files to verify that references to 347 * hfilelinks (created by snapshot clones) can be properly interpreted. 348 */ 349 @Test 350 public void testReferenceToHFileLink() throws IOException { 351 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 352 Configuration testConf = new Configuration(this.conf); 353 CommonFSUtils.setRootDir(testConf, testDir); 354 355 // adding legal table name chars to verify regex handles it. 356 RegionInfo hri = RegionInfoBuilder.newBuilder(TableName.valueOf("_original-evil-name")).build(); 357 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 358 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 359 360 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 361 // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file> 362 StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs) 363 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 364 writeStoreFile(writer); 365 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 366 367 // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table> 368 RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build(); 369 HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 370 CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone); 371 Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY); 372 HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName()); 373 Path linkFilePath = 374 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 375 376 // create splits of the link. 377 // <root>/clone/splitA/<cf>/<reftohfilelink>, 378 // <root>/clone/splitB/<cf>/<reftohfilelink> 379 RegionInfo splitHriA = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(SPLITKEY).build(); 380 RegionInfo splitHriB = 381 RegionInfoBuilder.newBuilder(hri.getTable()).setStartKey(SPLITKEY).build(); 382 HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true); 383 f.initReader(); 384 Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top 385 Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom 386 f.closeStoreFile(true); 387 // OK test the thing 388 CommonFSUtils.logFileSystemState(fs, testDir, LOG); 389 390 // There is a case where a file with the hfilelink pattern is actually a daughter 391 // reference to a hfile link. This code in StoreFile that handles this case. 392 393 // Try to open store file from link 394 HStoreFile hsfA = new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true); 395 hsfA.initReader(); 396 397 // Now confirm that I can read from the ref to link 398 int count = 1; 399 HFileScanner s = hsfA.getReader().getScanner(false, false); 400 s.seekTo(); 401 while (s.next()) { 402 count++; 403 } 404 assertTrue(count > 0); // read some rows here 405 406 // Try to open store file from link 407 HStoreFile hsfB = new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true); 408 hsfB.initReader(); 409 410 // Now confirm that I can read from the ref to link 411 HFileScanner sB = hsfB.getReader().getScanner(false, false); 412 sB.seekTo(); 413 414 // count++ as seekTo() will advance the scanner 415 count++; 416 while (sB.next()) { 417 count++; 418 } 419 420 // read the rest of the rows 421 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 422 } 423 424 private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f) 425 throws IOException { 426 f.initReader(); 427 Cell midkey = f.getReader().midKey().get(); 428 KeyValue midKV = (KeyValue) midkey; 429 // 1. test using the midRow as the splitKey, this test will generate two Reference files 430 // in the children 431 byte[] midRow = CellUtil.cloneRow(midKV); 432 // Create top split. 433 RegionInfo topHri = 434 RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()).setEndKey(SPLITKEY).build(); 435 Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true); 436 // Create bottom split. 437 RegionInfo bottomHri = RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()) 438 .setStartKey(SPLITKEY).build(); 439 Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false); 440 // Make readers on top and bottom. 441 HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true); 442 topF.initReader(); 443 StoreFileReader top = topF.getReader(); 444 HStoreFile bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true); 445 bottomF.initReader(); 446 StoreFileReader bottom = bottomF.getReader(); 447 ByteBuffer previous = null; 448 LOG.info("Midkey: " + midKV.toString()); 449 ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey()); 450 try { 451 // Now make two HalfMapFiles and assert they can read the full backing 452 // file, one from the top and the other from the bottom. 453 // Test bottom half first. 454 // Now test reading from the top. 455 boolean first = true; 456 ByteBuffer key = null; 457 HFileScanner topScanner = top.getScanner(false, false); 458 while ( 459 (!topScanner.isSeeked() && topScanner.seekTo()) 460 || (topScanner.isSeeked() && topScanner.next()) 461 ) { 462 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 463 464 if ( 465 (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(), 466 key.arrayOffset(), key.limit())) > 0 467 ) { 468 fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey); 469 } 470 if (first) { 471 first = false; 472 LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key))); 473 } 474 } 475 LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key))); 476 477 first = true; 478 HFileScanner bottomScanner = bottom.getScanner(false, false); 479 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 480 previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 481 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 482 if (first) { 483 first = false; 484 LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 485 } 486 assertTrue(key.compareTo(bbMidkeyBytes) < 0); 487 } 488 if (previous != null) { 489 LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 490 } 491 // Remove references. 492 regionFs.cleanupDaughterRegion(topHri); 493 regionFs.cleanupDaughterRegion(bottomHri); 494 495 // 2. test using a midkey which will generate one Reference file and one HFileLink file. 496 // First, do a key that is < than first key. Ensure splits behave 497 // properly. 498 byte[] badmidkey = Bytes.toBytes(" ."); 499 assertTrue(fs.exists(f.getPath())); 500 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true); 501 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false); 502 503 assertNull(bottomPath); 504 505 topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true); 506 topF.initReader(); 507 top = topF.getReader(); 508 // Now read from the top. 509 first = true; 510 topScanner = top.getScanner(false, false); 511 KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue(); 512 while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) { 513 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 514 keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit()); 515 assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV, 516 badmidkey, 0, badmidkey.length) >= 0); 517 if (first) { 518 first = false; 519 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 520 LOG.info("First top when key < bottom: " + keyKV); 521 String tmp = 522 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 523 for (int i = 0; i < tmp.length(); i++) { 524 assertTrue(tmp.charAt(i) == 'a'); 525 } 526 } 527 } 528 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 529 LOG.info("Last top when key < bottom: " + keyKV); 530 String tmp = Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 531 for (int i = 0; i < tmp.length(); i++) { 532 assertTrue(tmp.charAt(i) == 'z'); 533 } 534 // Remove references. 535 regionFs.cleanupDaughterRegion(topHri); 536 regionFs.cleanupDaughterRegion(bottomHri); 537 538 // Test when badkey is > than last key in file ('||' > 'zz'). 539 badmidkey = Bytes.toBytes("|||"); 540 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true); 541 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false); 542 assertNull(topPath); 543 544 bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true); 545 bottomF.initReader(); 546 bottom = bottomF.getReader(); 547 first = true; 548 bottomScanner = bottom.getScanner(false, false); 549 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 550 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 551 if (first) { 552 first = false; 553 keyKV = KeyValueUtil.createKeyValueFromKey(key); 554 LOG.info("First bottom when key > top: " + keyKV); 555 tmp = Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 556 for (int i = 0; i < tmp.length(); i++) { 557 assertTrue(tmp.charAt(i) == 'a'); 558 } 559 } 560 } 561 keyKV = KeyValueUtil.createKeyValueFromKey(key); 562 LOG.info("Last bottom when key > top: " + keyKV); 563 for (int i = 0; i < tmp.length(); i++) { 564 assertTrue( 565 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()).charAt(i) 566 == 'z'); 567 } 568 } finally { 569 if (top != null) { 570 top.close(true); // evict since we are about to delete the file 571 } 572 if (bottom != null) { 573 bottom.close(true); // evict since we are about to delete the file 574 } 575 fs.delete(f.getPath(), true); 576 } 577 } 578 579 private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks, 580 boolean pread) { 581 return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false); 582 } 583 584 private static final String localFormatter = "%010d"; 585 586 private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception { 587 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 588 Path f = writer.getPath(); 589 long now = EnvironmentEdgeManager.currentTime(); 590 for (int i = 0; i < 2000; i += 2) { 591 String row = String.format(localFormatter, i); 592 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 593 now, Bytes.toBytes("value")); 594 writer.append(kv); 595 } 596 writer.close(); 597 598 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 599 StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true); 600 storeFileInfo.initHFileInfo(context); 601 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 602 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 603 reader.loadFileInfo(); 604 reader.loadBloomfilter(); 605 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 606 607 // check false positives rate 608 int falsePos = 0; 609 int falseNeg = 0; 610 for (int i = 0; i < 2000; i++) { 611 String row = String.format(localFormatter, i); 612 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 613 columns.add(Bytes.toBytes("family:col")); 614 615 Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 616 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col")); 617 HStore store = mock(HStore.class); 618 when(store.getColumnFamilyDescriptor()) 619 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 620 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 621 if (i % 2 == 0) { 622 if (!exists) { 623 falseNeg++; 624 } 625 } else { 626 if (exists) { 627 falsePos++; 628 } 629 } 630 } 631 reader.close(true); // evict because we are about to delete the file 632 fs.delete(f, true); 633 assertEquals("False negatives: " + falseNeg, 0, falseNeg); 634 int maxFalsePos = (int) (2 * 2000 * err); 635 assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " 636 + maxFalsePos + ")", falsePos <= maxFalsePos); 637 } 638 639 private static final int BLOCKSIZE_SMALL = 8192; 640 641 @Test 642 public void testBloomFilter() throws Exception { 643 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 644 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 645 646 // write the file 647 if (!fs.exists(ROOT_DIR)) { 648 fs.mkdirs(ROOT_DIR); 649 } 650 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 651 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 652 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 653 // Make a store file and write data to it. 654 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 655 .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build(); 656 bloomWriteRead(writer, fs); 657 } 658 659 @Test 660 public void testDeleteFamilyBloomFilter() throws Exception { 661 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 662 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 663 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 664 665 // write the file 666 if (!fs.exists(ROOT_DIR)) { 667 fs.mkdirs(ROOT_DIR); 668 } 669 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 670 671 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 672 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 673 // Make a store file and write data to it. 674 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 675 .withMaxKeyCount(2000).withFileContext(meta).build(); 676 677 // add delete family 678 long now = EnvironmentEdgeManager.currentTime(); 679 for (int i = 0; i < 2000; i += 2) { 680 String row = String.format(localFormatter, i); 681 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 682 now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value")); 683 writer.append(kv); 684 } 685 writer.close(); 686 687 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 688 StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true); 689 storeFileInfo.initHFileInfo(context); 690 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 691 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 692 reader.loadFileInfo(); 693 reader.loadBloomfilter(); 694 695 // check false positives rate 696 int falsePos = 0; 697 int falseNeg = 0; 698 for (int i = 0; i < 2000; i++) { 699 String row = String.format(localFormatter, i); 700 byte[] rowKey = Bytes.toBytes(row); 701 boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length); 702 if (i % 2 == 0) { 703 if (!exists) { 704 falseNeg++; 705 } 706 } else { 707 if (exists) { 708 falsePos++; 709 } 710 } 711 } 712 assertEquals(1000, reader.getDeleteFamilyCnt()); 713 reader.close(true); // evict because we are about to delete the file 714 fs.delete(f, true); 715 assertEquals("False negatives: " + falseNeg, 0, falseNeg); 716 int maxFalsePos = (int) (2 * 2000 * err); 717 assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " 718 + maxFalsePos, falsePos <= maxFalsePos); 719 } 720 721 /** 722 * Test for HBASE-8012 723 */ 724 @Test 725 public void testReseek() throws Exception { 726 // write the file 727 if (!fs.exists(ROOT_DIR)) { 728 fs.mkdirs(ROOT_DIR); 729 } 730 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 731 732 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 733 // Make a store file and write data to it. 734 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 735 .withFileContext(meta).build(); 736 737 writeStoreFile(writer); 738 writer.close(); 739 740 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 741 StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true); 742 storeFileInfo.initHFileInfo(context); 743 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 744 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 745 746 // Now do reseek with empty KV to position to the beginning of the file 747 748 KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY); 749 StoreFileScanner s = getStoreFileScanner(reader, false, false); 750 s.reseek(k); 751 752 assertNotNull("Intial reseek should position at the beginning of the file", s.peek()); 753 } 754 755 @Test 756 public void testBloomTypes() throws Exception { 757 float err = (float) 0.01; 758 FileSystem fs = FileSystem.getLocal(conf); 759 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err); 760 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 761 762 int rowCount = 50; 763 int colCount = 10; 764 int versions = 2; 765 766 // run once using columns and once using rows 767 BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW }; 768 int[] expKeys = { rowCount * colCount, rowCount }; 769 // below line deserves commentary. it is expected bloom false positives 770 // column = rowCount*2*colCount inserts 771 // row-level = only rowCount*2 inserts, but failures will be magnified by 772 // 2nd for loop for every column (2*colCount) 773 float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err }; 774 775 if (!fs.exists(ROOT_DIR)) { 776 fs.mkdirs(ROOT_DIR); 777 } 778 for (int x : new int[] { 0, 1 }) { 779 // write the file 780 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 781 782 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 783 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 784 // Make a store file and write data to it. 785 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 786 .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build(); 787 788 long now = EnvironmentEdgeManager.currentTime(); 789 for (int i = 0; i < rowCount * 2; i += 2) { // rows 790 for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers 791 String row = String.format(localFormatter, i); 792 String col = String.format(localFormatter, j); 793 for (int k = 0; k < versions; ++k) { // versions 794 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), 795 Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L)); 796 writer.append(kv); 797 } 798 } 799 } 800 writer.close(); 801 802 ReaderContext context = 803 new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen()) 804 .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build(); 805 StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true); 806 storeFileInfo.initHFileInfo(context); 807 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 808 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 809 reader.loadFileInfo(); 810 reader.loadBloomfilter(); 811 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 812 assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount()); 813 814 HStore store = mock(HStore.class); 815 when(store.getColumnFamilyDescriptor()) 816 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 817 // check false positives rate 818 int falsePos = 0; 819 int falseNeg = 0; 820 for (int i = 0; i < rowCount * 2; ++i) { // rows 821 for (int j = 0; j < colCount * 2; ++j) { // column qualifiers 822 String row = String.format(localFormatter, i); 823 String col = String.format(localFormatter, j); 824 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 825 columns.add(Bytes.toBytes("col" + col)); 826 827 Scan scan = 828 new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 829 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col))); 830 831 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 832 boolean shouldRowExist = i % 2 == 0; 833 boolean shouldColExist = j % 2 == 0; 834 shouldColExist = shouldColExist || bt[x] == BloomType.ROW; 835 if (shouldRowExist && shouldColExist) { 836 if (!exists) { 837 falseNeg++; 838 } 839 } else { 840 if (exists) { 841 falsePos++; 842 } 843 } 844 } 845 } 846 reader.close(true); // evict because we are about to delete the file 847 fs.delete(f, true); 848 System.out.println(bt[x].toString()); 849 System.out.println(" False negatives: " + falseNeg); 850 System.out.println(" False positives: " + falsePos); 851 assertEquals(0, falseNeg); 852 assertTrue(falsePos < 2 * expErr[x]); 853 } 854 } 855 856 @Test 857 public void testSeqIdComparator() { 858 assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"), 859 mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"), 860 mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"), 861 mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"), 862 mockStoreFile(false, 76, -1, 5, "/foo/3")); 863 } 864 865 /** 866 * Assert that the given comparator orders the given storefiles in the same way that they're 867 * passed. 868 */ 869 private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) { 870 ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs); 871 Collections.shuffle(sorted); 872 Collections.sort(sorted, comparator); 873 LOG.debug("sfs: " + Joiner.on(",").join(sfs)); 874 LOG.debug("sorted: " + Joiner.on(",").join(sorted)); 875 assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted)); 876 } 877 878 /** 879 * Create a mock StoreFile with the given attributes. 880 */ 881 private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId, 882 String path) { 883 HStoreFile mock = Mockito.mock(HStoreFile.class); 884 StoreFileReader reader = Mockito.mock(StoreFileReader.class); 885 886 Mockito.doReturn(size).when(reader).length(); 887 888 Mockito.doReturn(reader).when(mock).getReader(); 889 Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult(); 890 Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp(); 891 Mockito.doReturn(seqId).when(mock).getMaxSequenceId(); 892 Mockito.doReturn(new Path(path)).when(mock).getPath(); 893 String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp 894 + " seqId=" + seqId + " path=" + path; 895 Mockito.doReturn(name).when(mock).toString(); 896 return mock; 897 } 898 899 /** 900 * Generate a list of KeyValues for testing based on given parameters 901 * @return the rows key-value list 902 */ 903 List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) { 904 List<KeyValue> kvList = new ArrayList<>(); 905 for (int i = 1; i <= numRows; i++) { 906 byte[] b = Bytes.toBytes(i); 907 LOG.info(Bytes.toString(b)); 908 LOG.info(Bytes.toString(b)); 909 for (long timestamp : timestamps) { 910 kvList.add(new KeyValue(b, family, qualifier, timestamp, b)); 911 } 912 } 913 return kvList; 914 } 915 916 /** 917 * Test to ensure correctness when using StoreFile with multiple timestamps 918 */ 919 @Test 920 public void testMultipleTimestamps() throws IOException { 921 byte[] family = Bytes.toBytes("familyname"); 922 byte[] qualifier = Bytes.toBytes("qualifier"); 923 int numRows = 10; 924 long[] timestamps = new long[] { 20, 10, 5, 1 }; 925 Scan scan = new Scan(); 926 927 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 928 Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family)); 929 Path dir = new Path(storedir, "1234567890"); 930 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 931 // Make a store file and write data to it. 932 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 933 .withOutputDir(dir).withFileContext(meta).build(); 934 935 List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family); 936 937 for (KeyValue kv : kvList) { 938 writer.append(kv); 939 } 940 writer.appendMetadata(0, false); 941 writer.close(); 942 943 HStoreFile hsf = 944 new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true); 945 HStore store = mock(HStore.class); 946 when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family)); 947 hsf.initReader(); 948 StoreFileReader reader = hsf.getReader(); 949 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 950 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 951 columns.add(qualifier); 952 953 scan.setTimeRange(20, 100); 954 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 955 956 scan.setTimeRange(1, 2); 957 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 958 959 scan.setTimeRange(8, 10); 960 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 961 962 // lets make sure it still works with column family time ranges 963 scan.setColumnFamilyTimeRange(family, 7, 50); 964 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 965 966 // This test relies on the timestamp range optimization 967 scan = new Scan(); 968 scan.setTimeRange(27, 50); 969 assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 970 971 // should still use the scanner because we override the family time range 972 scan = new Scan(); 973 scan.setTimeRange(27, 50); 974 scan.setColumnFamilyTimeRange(family, 7, 50); 975 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 976 } 977 978 @Test 979 public void testCacheOnWriteEvictOnClose() throws Exception { 980 Configuration conf = this.conf; 981 982 // Find a home for our files (regiondir ("7e0102") and familyname). 983 Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC"); 984 985 // Grab the block cache and get the initial hit/miss counts 986 BlockCache bc = BlockCacheFactory.createBlockCache(conf); 987 assertNotNull(bc); 988 CacheStats cs = bc.getStats(); 989 long startHit = cs.getHitCount(); 990 long startMiss = cs.getMissCount(); 991 long startEvicted = cs.getEvictedCount(); 992 993 // Let's write a StoreFile with three blocks, with cache on write off 994 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false); 995 CacheConfig cacheConf = new CacheConfig(conf, bc); 996 Path pathCowOff = new Path(baseDir, "123456789"); 997 StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3); 998 HStoreFile hsf = 999 new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true); 1000 LOG.debug(hsf.getPath().toString()); 1001 1002 // Read this file, we should see 3 misses 1003 hsf.initReader(); 1004 StoreFileReader reader = hsf.getReader(); 1005 reader.loadFileInfo(); 1006 StoreFileScanner scanner = getStoreFileScanner(reader, true, true); 1007 scanner.seek(KeyValue.LOWESTKEY); 1008 while (scanner.next() != null) { 1009 continue; 1010 } 1011 assertEquals(startHit, cs.getHitCount()); 1012 assertEquals(startMiss + 3, cs.getMissCount()); 1013 assertEquals(startEvicted, cs.getEvictedCount()); 1014 startMiss += 3; 1015 scanner.close(); 1016 reader.close(cacheConf.shouldEvictOnClose()); 1017 1018 // Now write a StoreFile with three blocks, with cache on write on 1019 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true); 1020 cacheConf = new CacheConfig(conf, bc); 1021 Path pathCowOn = new Path(baseDir, "123456788"); 1022 writer = writeStoreFile(conf, cacheConf, pathCowOn, 3); 1023 hsf = new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true); 1024 1025 // Read this file, we should see 3 hits 1026 hsf.initReader(); 1027 reader = hsf.getReader(); 1028 scanner = getStoreFileScanner(reader, true, true); 1029 scanner.seek(KeyValue.LOWESTKEY); 1030 while (scanner.next() != null) { 1031 continue; 1032 } 1033 assertEquals(startHit + 3, cs.getHitCount()); 1034 assertEquals(startMiss, cs.getMissCount()); 1035 assertEquals(startEvicted, cs.getEvictedCount()); 1036 startHit += 3; 1037 scanner.close(); 1038 reader.close(cacheConf.shouldEvictOnClose()); 1039 1040 // Let's read back the two files to ensure the blocks exactly match 1041 hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true); 1042 hsf.initReader(); 1043 StoreFileReader readerOne = hsf.getReader(); 1044 readerOne.loadFileInfo(); 1045 StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true); 1046 scannerOne.seek(KeyValue.LOWESTKEY); 1047 hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true); 1048 hsf.initReader(); 1049 StoreFileReader readerTwo = hsf.getReader(); 1050 readerTwo.loadFileInfo(); 1051 StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true); 1052 scannerTwo.seek(KeyValue.LOWESTKEY); 1053 Cell kv1 = null; 1054 Cell kv2 = null; 1055 while ((kv1 = scannerOne.next()) != null) { 1056 kv2 = scannerTwo.next(); 1057 assertTrue(kv1.equals(kv2)); 1058 KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1); 1059 KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2); 1060 assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(), 1061 keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0); 1062 assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(), 1063 kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0); 1064 } 1065 assertNull(scannerTwo.next()); 1066 assertEquals(startHit + 6, cs.getHitCount()); 1067 assertEquals(startMiss, cs.getMissCount()); 1068 assertEquals(startEvicted, cs.getEvictedCount()); 1069 startHit += 6; 1070 scannerOne.close(); 1071 readerOne.close(cacheConf.shouldEvictOnClose()); 1072 scannerTwo.close(); 1073 readerTwo.close(cacheConf.shouldEvictOnClose()); 1074 1075 // Let's close the first file with evict on close turned on 1076 conf.setBoolean("hbase.rs.evictblocksonclose", true); 1077 cacheConf = new CacheConfig(conf, bc); 1078 hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true); 1079 hsf.initReader(); 1080 reader = hsf.getReader(); 1081 reader.close(cacheConf.shouldEvictOnClose()); 1082 1083 // We should have 3 new evictions but the evict count stat should not change. Eviction because 1084 // of HFile invalidation is not counted along with normal evictions 1085 assertEquals(startHit, cs.getHitCount()); 1086 assertEquals(startMiss, cs.getMissCount()); 1087 assertEquals(startEvicted, cs.getEvictedCount()); 1088 1089 // Let's close the second file with evict on close turned off 1090 conf.setBoolean("hbase.rs.evictblocksonclose", false); 1091 cacheConf = new CacheConfig(conf, bc); 1092 hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true); 1093 hsf.initReader(); 1094 reader = hsf.getReader(); 1095 reader.close(cacheConf.shouldEvictOnClose()); 1096 1097 // We expect no changes 1098 assertEquals(startHit, cs.getHitCount()); 1099 assertEquals(startMiss, cs.getMissCount()); 1100 assertEquals(startEvicted, cs.getEvictedCount()); 1101 } 1102 1103 private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri, 1104 final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef) 1105 throws IOException { 1106 Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null); 1107 if (null == path) { 1108 return null; 1109 } 1110 List<Path> splitFiles = new ArrayList<>(); 1111 splitFiles.add(path); 1112 MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class); 1113 MasterServices mockServices = mock(MasterServices.class); 1114 when(mockEnv.getMasterServices()).thenReturn(mockServices); 1115 when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration()); 1116 TableDescriptors mockTblDescs = mock(TableDescriptors.class); 1117 when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs); 1118 TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable()) 1119 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); 1120 when(mockTblDescs.get(any())).thenReturn(mockTblDesc); 1121 Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv); 1122 return new Path(new Path(regionDir, family), path.getName()); 1123 } 1124 1125 private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path, 1126 int numBlocks) throws IOException { 1127 // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs 1128 int numKVs = 5 * numBlocks; 1129 List<KeyValue> kvs = new ArrayList<>(numKVs); 1130 byte[] b = Bytes.toBytes("x"); 1131 int totalSize = 0; 1132 for (int i = numKVs; i > 0; i--) { 1133 KeyValue kv = new KeyValue(b, b, b, i, b); 1134 kvs.add(kv); 1135 // kv has memstoreTS 0, which takes 1 byte to store. 1136 totalSize += kv.getLength() + 1; 1137 } 1138 int blockSize = totalSize / numBlocks; 1139 HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE) 1140 .withBytesPerCheckSum(CKBYTES).build(); 1141 // Make a store file and write data to it. 1142 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1143 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1144 // We'll write N-1 KVs to ensure we don't write an extra block 1145 kvs.remove(kvs.size() - 1); 1146 for (KeyValue kv : kvs) { 1147 writer.append(kv); 1148 } 1149 writer.appendMetadata(0, false); 1150 writer.close(); 1151 return writer; 1152 } 1153 1154 /** 1155 * Check if data block encoding information is saved correctly in HFile's file info. 1156 */ 1157 @Test 1158 public void testDataBlockEncodingMetaData() throws IOException { 1159 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1160 Path dir = new Path(new Path(testDir, "7e0102"), "familyname"); 1161 Path path = new Path(dir, "1234567890"); 1162 1163 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1164 cacheConf = new CacheConfig(conf); 1165 HFileContext meta = 1166 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1167 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1168 // Make a store file and write data to it. 1169 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1170 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1171 writer.close(); 1172 1173 HStoreFile storeFile = 1174 new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true); 1175 storeFile.initReader(); 1176 StoreFileReader reader = storeFile.getReader(); 1177 1178 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1179 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1180 assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value); 1181 } 1182 1183 @Test 1184 public void testDataBlockSizeEncoded() throws Exception { 1185 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1186 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1187 Path path = new Path(dir, "1234567890"); 1188 1189 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1190 1191 conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1); 1192 1193 cacheConf = new CacheConfig(conf); 1194 HFileContext meta = 1195 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1196 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1197 // Make a store file and write data to it. 1198 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1199 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1200 writeStoreFile(writer); 1201 1202 HStoreFile storeFile = 1203 new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true); 1204 storeFile.initReader(); 1205 StoreFileReader reader = storeFile.getReader(); 1206 1207 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1208 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1209 assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value)); 1210 1211 HFile.Reader fReader = 1212 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1213 1214 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1215 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1216 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1217 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1218 HFileBlock block; 1219 while (offset <= max) { 1220 block = fReader.readBlock(offset, -1, /* cacheBlock */ 1221 false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */ 1222 false, null, null); 1223 offset += block.getOnDiskSizeWithHeader(); 1224 double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL; 1225 if (offset <= max) { 1226 assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05)); 1227 } 1228 } 1229 } 1230 1231 @Test 1232 public void testDataBlockSizeCompressed() throws Exception { 1233 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, 1234 PreviousBlockCompressionRatePredicator.class.getName()); 1235 testDataBlockSizeWithCompressionRatePredicator(12, 1236 (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true); 1237 } 1238 1239 @Test 1240 public void testDataBlockSizeUnCompressed() throws Exception { 1241 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName()); 1242 testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10); 1243 } 1244 1245 private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount, 1246 BiFunction<Integer, Integer, Boolean> validation) throws Exception { 1247 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1248 Path path = new Path(dir, "1234567890"); 1249 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1250 cacheConf = new CacheConfig(conf); 1251 HFileContext meta = 1252 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1253 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo) 1254 .withCompression(Compression.Algorithm.GZ).build(); 1255 // Make a store file and write data to it. 1256 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1257 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1258 writeLargeStoreFile(writer, Bytes.toBytes(name.getMethodName()), 1259 Bytes.toBytes(name.getMethodName()), 200); 1260 writer.close(); 1261 HStoreFile storeFile = 1262 new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true); 1263 storeFile.initReader(); 1264 HFile.Reader fReader = 1265 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1266 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1267 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1268 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1269 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1270 HFileBlock block; 1271 int blockCount = 0; 1272 while (offset <= max) { 1273 block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false, 1274 /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); 1275 offset += block.getOnDiskSizeWithHeader(); 1276 blockCount++; 1277 assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount)); 1278 } 1279 assertEquals(expectedBlockCount, blockCount); 1280 } 1281 1282}