001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mob; 019 020import static org.apache.hadoop.hbase.HBaseTestingUtility.START_KEY; 021import static org.apache.hadoop.hbase.HBaseTestingUtility.fam1; 022import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY; 023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT; 024import static org.junit.Assert.assertEquals; 025import static org.junit.Assert.assertTrue; 026 027import java.io.IOException; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collection; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Map; 034import java.util.Optional; 035import java.util.Set; 036import java.util.UUID; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileStatus; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.hbase.Cell; 042import org.apache.hadoop.hbase.HBaseClassTestRule; 043import org.apache.hadoop.hbase.HBaseTestingUtility; 044import org.apache.hadoop.hbase.HColumnDescriptor; 045import org.apache.hadoop.hbase.HConstants; 046import org.apache.hadoop.hbase.HTableDescriptor; 047import org.apache.hadoop.hbase.KeyValue; 048import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 049import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 050import org.apache.hadoop.hbase.client.Delete; 051import org.apache.hadoop.hbase.client.Durability; 052import org.apache.hadoop.hbase.client.Put; 053import org.apache.hadoop.hbase.client.RegionInfo; 054import org.apache.hadoop.hbase.client.RegionInfoBuilder; 055import org.apache.hadoop.hbase.client.Scan; 056import org.apache.hadoop.hbase.client.Table; 057import org.apache.hadoop.hbase.client.TableDescriptor; 058import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 059import org.apache.hadoop.hbase.io.hfile.CacheConfig; 060import org.apache.hadoop.hbase.io.hfile.HFile; 061import org.apache.hadoop.hbase.io.hfile.HFileContext; 062import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 063import org.apache.hadoop.hbase.regionserver.BloomType; 064import org.apache.hadoop.hbase.regionserver.HRegion; 065import org.apache.hadoop.hbase.regionserver.HStore; 066import org.apache.hadoop.hbase.regionserver.HStoreFile; 067import org.apache.hadoop.hbase.regionserver.InternalScanner; 068import org.apache.hadoop.hbase.regionserver.RegionAsTable; 069import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 070import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; 071import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 072import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; 073import org.apache.hadoop.hbase.security.User; 074import org.apache.hadoop.hbase.testclassification.MediumTests; 075import org.apache.hadoop.hbase.util.Bytes; 076import org.apache.hadoop.hbase.util.CommonFSUtils; 077import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 078import org.apache.hadoop.hbase.util.Pair; 079import org.junit.After; 080import org.junit.ClassRule; 081import org.junit.Rule; 082import org.junit.Test; 083import org.junit.experimental.categories.Category; 084import org.junit.rules.TestName; 085import org.junit.runner.RunWith; 086import org.junit.runners.Parameterized; 087import org.slf4j.Logger; 088import org.slf4j.LoggerFactory; 089 090/** 091 * Test mob store compaction 092 */ 093@RunWith(Parameterized.class) 094@Category(MediumTests.class) 095public class TestMobStoreCompaction { 096 097 @ClassRule 098 public static final HBaseClassTestRule CLASS_RULE = 099 HBaseClassTestRule.forClass(TestMobStoreCompaction.class); 100 101 @Rule 102 public TestName name = new TestName(); 103 static final Logger LOG = LoggerFactory.getLogger(TestMobStoreCompaction.class.getName()); 104 private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); 105 private Configuration conf = null; 106 107 private HRegion region = null; 108 private HTableDescriptor htd = null; 109 private HColumnDescriptor hcd = null; 110 private long mobCellThreshold = 1000; 111 112 private FileSystem fs; 113 114 private static final byte[] COLUMN_FAMILY = fam1; 115 private final byte[] STARTROW = Bytes.toBytes(START_KEY); 116 private int compactionThreshold; 117 118 private Boolean useFileBasedSFT; 119 120 public TestMobStoreCompaction(Boolean useFileBasedSFT) { 121 this.useFileBasedSFT = useFileBasedSFT; 122 } 123 124 @Parameterized.Parameters 125 public static Collection<Boolean> data() { 126 Boolean[] data = { false, true }; 127 return Arrays.asList(data); 128 } 129 130 private void init(Configuration conf, long mobThreshold) throws Exception { 131 if (useFileBasedSFT) { 132 conf.set(StoreFileTrackerFactory.TRACKER_IMPL, 133 "org.apache.hadoop.hbase.regionserver.storefiletracker.FileBasedStoreFileTracker"); 134 } 135 136 this.conf = conf; 137 this.mobCellThreshold = mobThreshold; 138 HBaseTestingUtility UTIL = new HBaseTestingUtility(conf); 139 140 compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3); 141 htd = UTIL.createTableDescriptor(TestMobUtils.getTableName(name)); 142 hcd = new HColumnDescriptor(COLUMN_FAMILY); 143 hcd.setMobEnabled(true); 144 hcd.setMobThreshold(mobThreshold); 145 hcd.setMaxVersions(1); 146 htd.modifyFamily(hcd); 147 148 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(htd.getTableName()).build(); 149 region = HBaseTestingUtility.createRegionAndWAL(regionInfo, UTIL.getDataTestDir(), conf, htd, 150 new MobFileCache(conf)); 151 fs = FileSystem.get(conf); 152 } 153 154 @After 155 public void tearDown() throws Exception { 156 region.close(); 157 fs.delete(UTIL.getDataTestDir(), true); 158 } 159 160 /** 161 * During compaction, cells smaller than the threshold won't be affected. 162 */ 163 @Test 164 public void testSmallerValue() throws Exception { 165 init(UTIL.getConfiguration(), 500); 166 byte[] dummyData = makeDummyData(300); // smaller than mob threshold 167 Table loader = new RegionAsTable(region); 168 // one hfile per row 169 for (int i = 0; i < compactionThreshold; i++) { 170 Put p = createPut(i, dummyData); 171 loader.put(p); 172 region.flush(true); 173 } 174 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 175 assertEquals("Before compaction: mob file count", 0, countMobFiles()); 176 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 177 assertEquals("Before compaction: mob rows", 0, countMobRows()); 178 179 region.compactStores(); 180 181 assertEquals("After compaction: store files", 1, countStoreFiles()); 182 assertEquals("After compaction: mob file count", 0, countMobFiles()); 183 assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); 184 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 185 assertEquals("After compaction: mob rows", 0, countMobRows()); 186 } 187 188 /** 189 * During compaction, the mob threshold size is changed. 190 */ 191 @Test 192 public void testLargerValue() throws Exception { 193 init(UTIL.getConfiguration(), 200); 194 byte[] dummyData = makeDummyData(300); // larger than mob threshold 195 Table loader = new RegionAsTable(region); 196 for (int i = 0; i < compactionThreshold; i++) { 197 Put p = createPut(i, dummyData); 198 loader.put(p); 199 region.flush(true); 200 } 201 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 202 assertEquals("Before compaction: mob file count", compactionThreshold, countMobFiles()); 203 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 204 assertEquals("Before compaction: mob rows", compactionThreshold, countMobRows()); 205 assertEquals("Before compaction: number of mob cells", compactionThreshold, 206 countMobCellsInMetadata()); 207 // Change the threshold larger than the data size 208 setMobThreshold(region, COLUMN_FAMILY, 500); 209 region.initialize(); 210 211 List<HStore> stores = region.getStores(); 212 for (HStore store : stores) { 213 // Force major compaction 214 store.triggerMajorCompaction(); 215 Optional<CompactionContext> context = store.requestCompaction(HStore.PRIORITY_USER, 216 CompactionLifeCycleTracker.DUMMY, User.getCurrent()); 217 if (!context.isPresent()) { 218 continue; 219 } 220 region.compact(context.get(), store, NoLimitThroughputController.INSTANCE, User.getCurrent()); 221 } 222 223 assertEquals("After compaction: store files", 1, countStoreFiles()); 224 assertEquals("After compaction: mob file count", compactionThreshold, countMobFiles()); 225 assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); 226 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 227 assertEquals("After compaction: mob rows", 0, countMobRows()); 228 } 229 230 private static HRegion setMobThreshold(HRegion region, byte[] cfName, long modThreshold) { 231 ColumnFamilyDescriptor cfd = 232 ColumnFamilyDescriptorBuilder.newBuilder(region.getTableDescriptor().getColumnFamily(cfName)) 233 .setMobThreshold(modThreshold).build(); 234 TableDescriptor td = TableDescriptorBuilder.newBuilder(region.getTableDescriptor()) 235 .removeColumnFamily(cfName).setColumnFamily(cfd).build(); 236 region.setTableDescriptor(td); 237 return region; 238 } 239 240 /** 241 * This test will first generate store files, then bulk load them and trigger the compaction. When 242 * compaction, the cell value will be larger than the threshold. 243 */ 244 @Test 245 public void testMobCompactionWithBulkload() throws Exception { 246 // The following will produce store files of 600. 247 init(UTIL.getConfiguration(), 300); 248 byte[] dummyData = makeDummyData(600); 249 250 Path hbaseRootDir = CommonFSUtils.getRootDir(conf); 251 Path basedir = new Path(hbaseRootDir, htd.getNameAsString()); 252 List<Pair<byte[], String>> hfiles = new ArrayList<>(1); 253 for (int i = 0; i < compactionThreshold; i++) { 254 Path hpath = new Path(basedir, UUID.randomUUID().toString().replace("-", "")); 255 hfiles.add(Pair.newPair(COLUMN_FAMILY, hpath.toString())); 256 createHFile(hpath, i, dummyData); 257 } 258 259 // The following will bulk load the above generated store files and compact, with 600(fileSize) 260 // > 300(threshold) 261 Map<byte[], List<Path>> map = region.bulkLoadHFiles(hfiles, true, null); 262 assertTrue("Bulkload result:", !map.isEmpty()); 263 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 264 assertEquals("Before compaction: mob file count", 0, countMobFiles()); 265 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 266 assertEquals("Before compaction: mob rows", 0, countMobRows()); 267 assertEquals("Before compaction: referenced mob file count", 0, countReferencedMobFiles()); 268 269 region.compactStores(); 270 271 assertEquals("After compaction: store files", 1, countStoreFiles()); 272 assertEquals("After compaction: mob file count:", 1, countMobFiles()); 273 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 274 assertEquals("After compaction: mob rows", compactionThreshold, countMobRows()); 275 assertEquals("After compaction: referenced mob file count", 1, countReferencedMobFiles()); 276 assertEquals("After compaction: number of mob cells", compactionThreshold, 277 countMobCellsInMetadata()); 278 } 279 280 @Test 281 public void testMajorCompactionAfterDelete() throws Exception { 282 init(UTIL.getConfiguration(), 100); 283 byte[] dummyData = makeDummyData(200); // larger than mob threshold 284 Table loader = new RegionAsTable(region); 285 // create hfiles and mob hfiles but don't trigger compaction 286 int numHfiles = compactionThreshold - 1; 287 byte[] deleteRow = Bytes.add(STARTROW, Bytes.toBytes(0)); 288 for (int i = 0; i < numHfiles; i++) { 289 Put p = createPut(i, dummyData); 290 loader.put(p); 291 region.flush(true); 292 } 293 assertEquals("Before compaction: store files", numHfiles, countStoreFiles()); 294 assertEquals("Before compaction: mob file count", numHfiles, countMobFiles()); 295 assertEquals("Before compaction: rows", numHfiles, UTIL.countRows(region)); 296 assertEquals("Before compaction: mob rows", numHfiles, countMobRows()); 297 assertEquals("Before compaction: number of mob cells", numHfiles, countMobCellsInMetadata()); 298 // now let's delete some cells that contain mobs 299 Delete delete = new Delete(deleteRow); 300 delete.addFamily(COLUMN_FAMILY); 301 region.delete(delete); 302 region.flush(true); 303 304 assertEquals("Before compaction: store files", numHfiles + 1, countStoreFiles()); 305 assertEquals("Before compaction: mob files", numHfiles, countMobFiles()); 306 // region.compactStores(); 307 region.compact(true); 308 assertEquals("After compaction: store files", 1, countStoreFiles()); 309 } 310 311 private int countStoreFiles() throws IOException { 312 HStore store = region.getStore(COLUMN_FAMILY); 313 return store.getStorefilesCount(); 314 } 315 316 private int countMobFiles() throws IOException { 317 Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); 318 if (fs.exists(mobDirPath)) { 319 FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); 320 return files.length; 321 } 322 return 0; 323 } 324 325 private long countMobCellsInMetadata() throws IOException { 326 long mobCellsCount = 0; 327 Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); 328 Configuration copyOfConf = new Configuration(conf); 329 copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 330 CacheConfig cacheConfig = new CacheConfig(copyOfConf); 331 if (fs.exists(mobDirPath)) { 332 FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); 333 for (FileStatus file : files) { 334 HStoreFile sf = new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true); 335 sf.initReader(); 336 Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo(); 337 byte[] count = fileInfo.get(MOB_CELLS_COUNT); 338 assertTrue(count != null); 339 mobCellsCount += Bytes.toLong(count); 340 } 341 } 342 return mobCellsCount; 343 } 344 345 private Put createPut(int rowIdx, byte[] dummyData) throws IOException { 346 Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(rowIdx))); 347 p.setDurability(Durability.SKIP_WAL); 348 p.addColumn(COLUMN_FAMILY, Bytes.toBytes("colX"), dummyData); 349 return p; 350 } 351 352 /** 353 * Create an HFile with the given number of bytes 354 */ 355 private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException { 356 HFileContext meta = new HFileContextBuilder().build(); 357 HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path) 358 .withFileContext(meta).create(); 359 long now = EnvironmentEdgeManager.currentTime(); 360 try { 361 KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY, 362 Bytes.toBytes("colX"), now, dummyData); 363 writer.append(kv); 364 } finally { 365 writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTime())); 366 writer.close(); 367 } 368 } 369 370 private int countMobRows() throws IOException { 371 Scan scan = new Scan(); 372 // Do not retrieve the mob data when scanning 373 scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); 374 InternalScanner scanner = region.getScanner(scan); 375 376 int scannedCount = 0; 377 List<Cell> results = new ArrayList<>(); 378 boolean hasMore = true; 379 while (hasMore) { 380 hasMore = scanner.next(results); 381 for (Cell c : results) { 382 if (MobUtils.isMobReferenceCell(c)) { 383 scannedCount++; 384 } 385 } 386 results.clear(); 387 } 388 scanner.close(); 389 390 return scannedCount; 391 } 392 393 private byte[] makeDummyData(int size) { 394 byte[] dummyData = new byte[size]; 395 Bytes.random(dummyData); 396 return dummyData; 397 } 398 399 private int countReferencedMobFiles() throws IOException { 400 Scan scan = new Scan(); 401 // Do not retrieve the mob data when scanning 402 scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); 403 InternalScanner scanner = region.getScanner(scan); 404 405 List<Cell> kvs = new ArrayList<>(); 406 boolean hasMore = true; 407 String fileName; 408 Set<String> files = new HashSet<>(); 409 do { 410 kvs.clear(); 411 hasMore = scanner.next(kvs); 412 for (Cell kv : kvs) { 413 if (!MobUtils.isMobReferenceCell(kv)) { 414 continue; 415 } 416 if (!MobUtils.hasValidMobRefCellValue(kv)) { 417 continue; 418 } 419 int size = MobUtils.getMobValueLength(kv); 420 if (size <= mobCellThreshold) { 421 continue; 422 } 423 fileName = MobUtils.getMobFileName(kv); 424 if (fileName.isEmpty()) { 425 continue; 426 } 427 files.add(fileName); 428 Path familyPath = 429 MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); 430 assertTrue(fs.exists(new Path(familyPath, fileName))); 431 } 432 } while (hasMore); 433 434 scanner.close(); 435 436 return files.size(); 437 } 438}