001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mob; 019 020import static org.apache.hadoop.hbase.HBaseTestingUtil.START_KEY; 021import static org.apache.hadoop.hbase.HBaseTestingUtil.fam1; 022import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY; 023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT; 024import static org.junit.Assert.assertEquals; 025import static org.junit.Assert.assertTrue; 026 027import java.io.IOException; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collection; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Map; 034import java.util.Optional; 035import java.util.Set; 036import java.util.UUID; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileStatus; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.hbase.Cell; 042import org.apache.hadoop.hbase.HBaseClassTestRule; 043import org.apache.hadoop.hbase.HBaseTestingUtil; 044import org.apache.hadoop.hbase.HConstants; 045import org.apache.hadoop.hbase.KeyValue; 046import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 047import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 048import org.apache.hadoop.hbase.client.Delete; 049import org.apache.hadoop.hbase.client.Durability; 050import org.apache.hadoop.hbase.client.Put; 051import org.apache.hadoop.hbase.client.RegionInfo; 052import org.apache.hadoop.hbase.client.RegionInfoBuilder; 053import org.apache.hadoop.hbase.client.Scan; 054import org.apache.hadoop.hbase.client.Table; 055import org.apache.hadoop.hbase.client.TableDescriptor; 056import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 057import org.apache.hadoop.hbase.io.hfile.CacheConfig; 058import org.apache.hadoop.hbase.io.hfile.HFile; 059import org.apache.hadoop.hbase.io.hfile.HFileContext; 060import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 061import org.apache.hadoop.hbase.regionserver.BloomType; 062import org.apache.hadoop.hbase.regionserver.HRegion; 063import org.apache.hadoop.hbase.regionserver.HStore; 064import org.apache.hadoop.hbase.regionserver.HStoreFile; 065import org.apache.hadoop.hbase.regionserver.InternalScanner; 066import org.apache.hadoop.hbase.regionserver.RegionAsTable; 067import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 068import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; 069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 070import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; 071import org.apache.hadoop.hbase.security.User; 072import org.apache.hadoop.hbase.testclassification.MediumTests; 073import org.apache.hadoop.hbase.util.Bytes; 074import org.apache.hadoop.hbase.util.CommonFSUtils; 075import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 076import org.apache.hadoop.hbase.util.Pair; 077import org.junit.After; 078import org.junit.ClassRule; 079import org.junit.Rule; 080import org.junit.Test; 081import org.junit.experimental.categories.Category; 082import org.junit.rules.TestName; 083import org.junit.runner.RunWith; 084import org.junit.runners.Parameterized; 085import org.slf4j.Logger; 086import org.slf4j.LoggerFactory; 087 088/** 089 * Test mob store compaction 090 */ 091@RunWith(Parameterized.class) 092@Category(MediumTests.class) 093public class TestMobStoreCompaction { 094 095 @ClassRule 096 public static final HBaseClassTestRule CLASS_RULE = 097 HBaseClassTestRule.forClass(TestMobStoreCompaction.class); 098 099 @Rule 100 public TestName name = new TestName(); 101 static final Logger LOG = LoggerFactory.getLogger(TestMobStoreCompaction.class.getName()); 102 private final static HBaseTestingUtil UTIL = new HBaseTestingUtil(); 103 private Configuration conf = null; 104 105 private HRegion region = null; 106 private TableDescriptor tableDescriptor = null; 107 private ColumnFamilyDescriptor familyDescriptor = null; 108 private long mobCellThreshold = 1000; 109 110 private FileSystem fs; 111 112 private static final byte[] COLUMN_FAMILY = fam1; 113 private final byte[] STARTROW = Bytes.toBytes(START_KEY); 114 private int compactionThreshold; 115 116 private Boolean useFileBasedSFT; 117 118 public TestMobStoreCompaction(Boolean useFileBasedSFT) { 119 this.useFileBasedSFT = useFileBasedSFT; 120 } 121 122 @Parameterized.Parameters 123 public static Collection<Boolean> data() { 124 Boolean[] data = { false, true }; 125 return Arrays.asList(data); 126 } 127 128 private void init(Configuration conf, long mobThreshold) throws Exception { 129 if (useFileBasedSFT) { 130 conf.set(StoreFileTrackerFactory.TRACKER_IMPL, 131 "org.apache.hadoop.hbase.regionserver.storefiletracker.FileBasedStoreFileTracker"); 132 } 133 134 this.conf = conf; 135 this.mobCellThreshold = mobThreshold; 136 137 HBaseTestingUtil UTIL = new HBaseTestingUtil(conf); 138 139 compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3); 140 familyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(COLUMN_FAMILY).setMobEnabled(true) 141 .setMobThreshold(mobThreshold).setMaxVersions(1).build(); 142 tableDescriptor = UTIL.createModifyableTableDescriptor(TestMobUtils.getTableName(name)) 143 .modifyColumnFamily(familyDescriptor).build(); 144 145 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); 146 region = HBaseTestingUtil.createRegionAndWAL(regionInfo, UTIL.getDataTestDir(), conf, 147 tableDescriptor, new MobFileCache(conf)); 148 fs = FileSystem.get(conf); 149 } 150 151 @After 152 public void tearDown() throws Exception { 153 region.close(); 154 fs.delete(UTIL.getDataTestDir(), true); 155 } 156 157 /** 158 * During compaction, cells smaller than the threshold won't be affected. 159 */ 160 @Test 161 public void testSmallerValue() throws Exception { 162 init(UTIL.getConfiguration(), 500); 163 byte[] dummyData = makeDummyData(300); // smaller than mob threshold 164 Table loader = new RegionAsTable(region); 165 // one hfile per row 166 for (int i = 0; i < compactionThreshold; i++) { 167 Put p = createPut(i, dummyData); 168 loader.put(p); 169 region.flush(true); 170 } 171 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 172 assertEquals("Before compaction: mob file count", 0, countMobFiles()); 173 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 174 assertEquals("Before compaction: mob rows", 0, countMobRows()); 175 176 region.compactStores(); 177 178 assertEquals("After compaction: store files", 1, countStoreFiles()); 179 assertEquals("After compaction: mob file count", 0, countMobFiles()); 180 assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); 181 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 182 assertEquals("After compaction: mob rows", 0, countMobRows()); 183 } 184 185 /** 186 * During compaction, the mob threshold size is changed. 187 */ 188 @Test 189 public void testLargerValue() throws Exception { 190 init(UTIL.getConfiguration(), 200); 191 byte[] dummyData = makeDummyData(300); // larger than mob threshold 192 Table loader = new RegionAsTable(region); 193 for (int i = 0; i < compactionThreshold; i++) { 194 Put p = createPut(i, dummyData); 195 loader.put(p); 196 region.flush(true); 197 } 198 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 199 assertEquals("Before compaction: mob file count", compactionThreshold, countMobFiles()); 200 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 201 assertEquals("Before compaction: mob rows", compactionThreshold, countMobRows()); 202 assertEquals("Before compaction: number of mob cells", compactionThreshold, 203 countMobCellsInMetadata()); 204 // Change the threshold larger than the data size 205 setMobThreshold(region, COLUMN_FAMILY, 500); 206 region.initialize(); 207 208 List<HStore> stores = region.getStores(); 209 for (HStore store : stores) { 210 // Force major compaction 211 store.triggerMajorCompaction(); 212 Optional<CompactionContext> context = store.requestCompaction(HStore.PRIORITY_USER, 213 CompactionLifeCycleTracker.DUMMY, User.getCurrent()); 214 if (!context.isPresent()) { 215 continue; 216 } 217 region.compact(context.get(), store, NoLimitThroughputController.INSTANCE, User.getCurrent()); 218 } 219 220 assertEquals("After compaction: store files", 1, countStoreFiles()); 221 assertEquals("After compaction: mob file count", compactionThreshold, countMobFiles()); 222 assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); 223 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 224 assertEquals("After compaction: mob rows", 0, countMobRows()); 225 } 226 227 private static HRegion setMobThreshold(HRegion region, byte[] cfName, long modThreshold) { 228 ColumnFamilyDescriptor cfd = 229 ColumnFamilyDescriptorBuilder.newBuilder(region.getTableDescriptor().getColumnFamily(cfName)) 230 .setMobThreshold(modThreshold).build(); 231 TableDescriptor td = TableDescriptorBuilder.newBuilder(region.getTableDescriptor()) 232 .removeColumnFamily(cfName).setColumnFamily(cfd).build(); 233 region.setTableDescriptor(td); 234 return region; 235 } 236 237 /** 238 * This test will first generate store files, then bulk load them and trigger the compaction. When 239 * compaction, the cell value will be larger than the threshold. 240 */ 241 @Test 242 public void testMobCompactionWithBulkload() throws Exception { 243 // The following will produce store files of 600. 244 init(UTIL.getConfiguration(), 300); 245 byte[] dummyData = makeDummyData(600); 246 247 Path hbaseRootDir = CommonFSUtils.getRootDir(conf); 248 Path basedir = new Path(hbaseRootDir, tableDescriptor.getTableName().getNameAsString()); 249 List<Pair<byte[], String>> hfiles = new ArrayList<>(1); 250 for (int i = 0; i < compactionThreshold; i++) { 251 Path hpath = new Path(basedir, UUID.randomUUID().toString().replace("-", "")); 252 hfiles.add(Pair.newPair(COLUMN_FAMILY, hpath.toString())); 253 createHFile(hpath, i, dummyData); 254 } 255 256 // The following will bulk load the above generated store files and compact, with 600(fileSize) 257 // > 300(threshold) 258 Map<byte[], List<Path>> map = region.bulkLoadHFiles(hfiles, true, null); 259 assertTrue("Bulkload result:", !map.isEmpty()); 260 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 261 assertEquals("Before compaction: mob file count", 0, countMobFiles()); 262 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 263 assertEquals("Before compaction: mob rows", 0, countMobRows()); 264 assertEquals("Before compaction: referenced mob file count", 0, countReferencedMobFiles()); 265 266 region.compactStores(); 267 268 assertEquals("After compaction: store files", 1, countStoreFiles()); 269 assertEquals("After compaction: mob file count:", 1, countMobFiles()); 270 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 271 assertEquals("After compaction: mob rows", compactionThreshold, countMobRows()); 272 assertEquals("After compaction: referenced mob file count", 1, countReferencedMobFiles()); 273 assertEquals("After compaction: number of mob cells", compactionThreshold, 274 countMobCellsInMetadata()); 275 } 276 277 @Test 278 public void testMajorCompactionAfterDelete() throws Exception { 279 init(UTIL.getConfiguration(), 100); 280 byte[] dummyData = makeDummyData(200); // larger than mob threshold 281 Table loader = new RegionAsTable(region); 282 // create hfiles and mob hfiles but don't trigger compaction 283 int numHfiles = compactionThreshold - 1; 284 byte[] deleteRow = Bytes.add(STARTROW, Bytes.toBytes(0)); 285 for (int i = 0; i < numHfiles; i++) { 286 Put p = createPut(i, dummyData); 287 loader.put(p); 288 region.flush(true); 289 } 290 assertEquals("Before compaction: store files", numHfiles, countStoreFiles()); 291 assertEquals("Before compaction: mob file count", numHfiles, countMobFiles()); 292 assertEquals("Before compaction: rows", numHfiles, UTIL.countRows(region)); 293 assertEquals("Before compaction: mob rows", numHfiles, countMobRows()); 294 assertEquals("Before compaction: number of mob cells", numHfiles, countMobCellsInMetadata()); 295 // now let's delete some cells that contain mobs 296 Delete delete = new Delete(deleteRow); 297 delete.addFamily(COLUMN_FAMILY); 298 region.delete(delete); 299 region.flush(true); 300 301 assertEquals("Before compaction: store files", numHfiles + 1, countStoreFiles()); 302 assertEquals("Before compaction: mob files", numHfiles, countMobFiles()); 303 // region.compactStores(); 304 region.compact(true); 305 assertEquals("After compaction: store files", 1, countStoreFiles()); 306 } 307 308 private int countStoreFiles() throws IOException { 309 HStore store = region.getStore(COLUMN_FAMILY); 310 return store.getStorefilesCount(); 311 } 312 313 private int countMobFiles() throws IOException { 314 Path mobDirPath = MobUtils.getMobFamilyPath(conf, tableDescriptor.getTableName(), 315 familyDescriptor.getNameAsString()); 316 if (fs.exists(mobDirPath)) { 317 FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); 318 return files.length; 319 } 320 return 0; 321 } 322 323 private long countMobCellsInMetadata() throws IOException { 324 long mobCellsCount = 0; 325 Path mobDirPath = MobUtils.getMobFamilyPath(conf, tableDescriptor.getTableName(), 326 familyDescriptor.getNameAsString()); 327 Configuration copyOfConf = new Configuration(conf); 328 copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 329 CacheConfig cacheConfig = new CacheConfig(copyOfConf); 330 if (fs.exists(mobDirPath)) { 331 FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); 332 for (FileStatus file : files) { 333 HStoreFile sf = new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true); 334 sf.initReader(); 335 Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo(); 336 byte[] count = fileInfo.get(MOB_CELLS_COUNT); 337 assertTrue(count != null); 338 mobCellsCount += Bytes.toLong(count); 339 } 340 } 341 return mobCellsCount; 342 } 343 344 private Put createPut(int rowIdx, byte[] dummyData) throws IOException { 345 Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(rowIdx))); 346 p.setDurability(Durability.SKIP_WAL); 347 p.addColumn(COLUMN_FAMILY, Bytes.toBytes("colX"), dummyData); 348 return p; 349 } 350 351 /** 352 * Create an HFile with the given number of bytes 353 */ 354 private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException { 355 HFileContext meta = new HFileContextBuilder().build(); 356 HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path) 357 .withFileContext(meta).create(); 358 long now = EnvironmentEdgeManager.currentTime(); 359 try { 360 KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY, 361 Bytes.toBytes("colX"), now, dummyData); 362 writer.append(kv); 363 } finally { 364 writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTime())); 365 writer.close(); 366 } 367 } 368 369 private int countMobRows() throws IOException { 370 Scan scan = new Scan(); 371 // Do not retrieve the mob data when scanning 372 scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); 373 InternalScanner scanner = region.getScanner(scan); 374 375 int scannedCount = 0; 376 List<Cell> results = new ArrayList<>(); 377 boolean hasMore = true; 378 while (hasMore) { 379 hasMore = scanner.next(results); 380 for (Cell c : results) { 381 if (MobUtils.isMobReferenceCell(c)) { 382 scannedCount++; 383 } 384 } 385 results.clear(); 386 } 387 scanner.close(); 388 389 return scannedCount; 390 } 391 392 private byte[] makeDummyData(int size) { 393 byte[] dummyData = new byte[size]; 394 Bytes.random(dummyData); 395 return dummyData; 396 } 397 398 private int countReferencedMobFiles() throws IOException { 399 Scan scan = new Scan(); 400 // Do not retrieve the mob data when scanning 401 scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); 402 InternalScanner scanner = region.getScanner(scan); 403 404 List<Cell> kvs = new ArrayList<>(); 405 boolean hasMore = true; 406 String fileName; 407 Set<String> files = new HashSet<>(); 408 do { 409 kvs.clear(); 410 hasMore = scanner.next(kvs); 411 for (Cell kv : kvs) { 412 if (!MobUtils.isMobReferenceCell(kv)) { 413 continue; 414 } 415 if (!MobUtils.hasValidMobRefCellValue(kv)) { 416 continue; 417 } 418 int size = MobUtils.getMobValueLength(kv); 419 if (size <= mobCellThreshold) { 420 continue; 421 } 422 fileName = MobUtils.getMobFileName(kv); 423 if (fileName.isEmpty()) { 424 continue; 425 } 426 files.add(fileName); 427 Path familyPath = MobUtils.getMobFamilyPath(conf, tableDescriptor.getTableName(), 428 familyDescriptor.getNameAsString()); 429 assertTrue(fs.exists(new Path(familyPath, fileName))); 430 } 431 } while (hasMore); 432 433 scanner.close(); 434 435 return files.size(); 436 } 437 438}