001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.HBaseTestingUtility.START_KEY; 021import static org.apache.hadoop.hbase.HBaseTestingUtility.fam1; 022import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY; 023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT; 024import static org.junit.Assert.assertEquals; 025import static org.junit.Assert.assertTrue; 026 027import java.io.IOException; 028import java.util.ArrayList; 029import java.util.HashSet; 030import java.util.List; 031import java.util.Map; 032import java.util.Random; 033import java.util.Set; 034import org.apache.hadoop.conf.Configuration; 035import org.apache.hadoop.fs.FileStatus; 036import org.apache.hadoop.fs.FileSystem; 037import org.apache.hadoop.fs.Path; 038import org.apache.hadoop.hbase.Cell; 039import org.apache.hadoop.hbase.CellComparatorImpl; 040import org.apache.hadoop.hbase.CellUtil; 041import org.apache.hadoop.hbase.HBaseClassTestRule; 042import org.apache.hadoop.hbase.HBaseTestingUtility; 043import org.apache.hadoop.hbase.HColumnDescriptor; 044import org.apache.hadoop.hbase.HConstants; 045import org.apache.hadoop.hbase.HTableDescriptor; 046import org.apache.hadoop.hbase.KeyValue; 047import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 048import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 049import org.apache.hadoop.hbase.client.Delete; 050import org.apache.hadoop.hbase.client.Durability; 051import org.apache.hadoop.hbase.client.Put; 052import org.apache.hadoop.hbase.client.RegionInfo; 053import org.apache.hadoop.hbase.client.RegionInfoBuilder; 054import org.apache.hadoop.hbase.client.Scan; 055import org.apache.hadoop.hbase.client.Table; 056import org.apache.hadoop.hbase.client.TableDescriptor; 057import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 058import org.apache.hadoop.hbase.io.hfile.CacheConfig; 059import org.apache.hadoop.hbase.io.hfile.HFile; 060import org.apache.hadoop.hbase.io.hfile.HFileContext; 061import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 062import org.apache.hadoop.hbase.mob.MobConstants; 063import org.apache.hadoop.hbase.mob.MobFileCache; 064import org.apache.hadoop.hbase.mob.MobUtils; 065import org.apache.hadoop.hbase.testclassification.SmallTests; 066import org.apache.hadoop.hbase.util.Bytes; 067import org.apache.hadoop.hbase.util.CommonFSUtils; 068import org.apache.hadoop.hbase.util.Pair; 069import org.junit.After; 070import org.junit.ClassRule; 071import org.junit.Rule; 072import org.junit.Test; 073import org.junit.experimental.categories.Category; 074import org.junit.rules.TestName; 075import org.slf4j.Logger; 076import org.slf4j.LoggerFactory; 077 078/** 079 * Test mob store compaction 080 */ 081@Category(SmallTests.class) 082public class TestMobStoreCompaction { 083 084 @ClassRule 085 public static final HBaseClassTestRule CLASS_RULE = 086 HBaseClassTestRule.forClass(TestMobStoreCompaction.class); 087 088 @Rule 089 public TestName name = new TestName(); 090 static final Logger LOG = LoggerFactory.getLogger(TestMobStoreCompaction.class.getName()); 091 private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); 092 private Configuration conf = null; 093 094 private HRegion region = null; 095 private HTableDescriptor htd = null; 096 private HColumnDescriptor hcd = null; 097 private long mobCellThreshold = 1000; 098 099 private FileSystem fs; 100 101 private static final byte[] COLUMN_FAMILY = fam1; 102 private final byte[] STARTROW = Bytes.toBytes(START_KEY); 103 private int compactionThreshold; 104 105 private void init(Configuration conf, long mobThreshold) throws Exception { 106 this.conf = conf; 107 this.mobCellThreshold = mobThreshold; 108 HBaseTestingUtility UTIL = new HBaseTestingUtility(conf); 109 110 compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3); 111 htd = UTIL.createTableDescriptor(name.getMethodName()); 112 hcd = new HColumnDescriptor(COLUMN_FAMILY); 113 hcd.setMobEnabled(true); 114 hcd.setMobThreshold(mobThreshold); 115 hcd.setMaxVersions(1); 116 htd.modifyFamily(hcd); 117 118 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(htd.getTableName()).build(); 119 region = HBaseTestingUtility 120 .createRegionAndWAL(regionInfo, UTIL.getDataTestDir(), conf, htd, new MobFileCache(conf)); 121 fs = FileSystem.get(conf); 122 } 123 124 @After 125 public void tearDown() throws Exception { 126 region.close(); 127 fs.delete(UTIL.getDataTestDir(), true); 128 } 129 130 /** 131 * During compaction, cells smaller than the threshold won't be affected. 132 */ 133 @Test 134 public void testSmallerValue() throws Exception { 135 init(UTIL.getConfiguration(), 500); 136 byte[] dummyData = makeDummyData(300); // smaller than mob threshold 137 Table loader = new RegionAsTable(region); 138 // one hfile per row 139 for (int i = 0; i < compactionThreshold; i++) { 140 Put p = createPut(i, dummyData); 141 loader.put(p); 142 region.flush(true); 143 } 144 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 145 assertEquals("Before compaction: mob file count", 0, countMobFiles()); 146 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 147 assertEquals("Before compaction: mob rows", 0, countMobRows()); 148 149 region.compactStores(); 150 151 assertEquals("After compaction: store files", 1, countStoreFiles()); 152 assertEquals("After compaction: mob file count", 0, countMobFiles()); 153 assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); 154 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 155 assertEquals("After compaction: mob rows", 0, countMobRows()); 156 } 157 158 /** 159 * During compaction, the mob threshold size is changed. 160 */ 161 @Test 162 public void testLargerValue() throws Exception { 163 init(UTIL.getConfiguration(), 200); 164 byte[] dummyData = makeDummyData(300); // larger than mob threshold 165 Table loader = new RegionAsTable(region); 166 for (int i = 0; i < compactionThreshold; i++) { 167 Put p = createPut(i, dummyData); 168 loader.put(p); 169 region.flush(true); 170 } 171 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 172 assertEquals("Before compaction: mob file count", compactionThreshold, countMobFiles()); 173 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 174 assertEquals("Before compaction: mob rows", compactionThreshold, countMobRows()); 175 assertEquals("Before compaction: number of mob cells", compactionThreshold, 176 countMobCellsInMetadata()); 177 // Change the threshold larger than the data size 178 setMobThreshold(region, COLUMN_FAMILY, 500); 179 region.initialize(); 180 region.compactStores(); 181 182 assertEquals("After compaction: store files", 1, countStoreFiles()); 183 assertEquals("After compaction: mob file count", compactionThreshold, countMobFiles()); 184 assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); 185 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 186 assertEquals("After compaction: mob rows", 0, countMobRows()); 187 } 188 189 private static HRegion setMobThreshold(HRegion region, byte[] cfName, long modThreshold) { 190 ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder 191 .newBuilder(region.getTableDescriptor().getColumnFamily(cfName)) 192 .setMobThreshold(modThreshold) 193 .build(); 194 TableDescriptor td = TableDescriptorBuilder 195 .newBuilder(region.getTableDescriptor()) 196 .removeColumnFamily(cfName) 197 .setColumnFamily(cfd) 198 .build(); 199 region.setTableDescriptor(td); 200 return region; 201 } 202 203 /** 204 * This test will first generate store files, then bulk load them and trigger the compaction. 205 * When compaction, the cell value will be larger than the threshold. 206 */ 207 @Test 208 public void testMobCompactionWithBulkload() throws Exception { 209 // The following will produce store files of 600. 210 init(UTIL.getConfiguration(), 300); 211 byte[] dummyData = makeDummyData(600); 212 213 Path hbaseRootDir = CommonFSUtils.getRootDir(conf); 214 Path basedir = new Path(hbaseRootDir, htd.getNameAsString()); 215 List<Pair<byte[], String>> hfiles = new ArrayList<>(1); 216 for (int i = 0; i < compactionThreshold; i++) { 217 Path hpath = new Path(basedir, "hfile" + i); 218 hfiles.add(Pair.newPair(COLUMN_FAMILY, hpath.toString())); 219 createHFile(hpath, i, dummyData); 220 } 221 222 // The following will bulk load the above generated store files and compact, with 600(fileSize) 223 // > 300(threshold) 224 Map<byte[], List<Path>> map = region.bulkLoadHFiles(hfiles, true, null); 225 assertTrue("Bulkload result:", !map.isEmpty()); 226 assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); 227 assertEquals("Before compaction: mob file count", 0, countMobFiles()); 228 assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); 229 assertEquals("Before compaction: mob rows", 0, countMobRows()); 230 assertEquals("Before compaction: referenced mob file count", 0, countReferencedMobFiles()); 231 232 region.compactStores(); 233 234 assertEquals("After compaction: store files", 1, countStoreFiles()); 235 assertEquals("After compaction: mob file count:", 1, countMobFiles()); 236 assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); 237 assertEquals("After compaction: mob rows", compactionThreshold, countMobRows()); 238 assertEquals("After compaction: referenced mob file count", 1, countReferencedMobFiles()); 239 assertEquals("After compaction: number of mob cells", compactionThreshold, 240 countMobCellsInMetadata()); 241 } 242 243 @Test 244 public void testMajorCompactionAfterDelete() throws Exception { 245 init(UTIL.getConfiguration(), 100); 246 byte[] dummyData = makeDummyData(200); // larger than mob threshold 247 Table loader = new RegionAsTable(region); 248 // create hfiles and mob hfiles but don't trigger compaction 249 int numHfiles = compactionThreshold - 1; 250 byte[] deleteRow = Bytes.add(STARTROW, Bytes.toBytes(0)); 251 for (int i = 0; i < numHfiles; i++) { 252 Put p = createPut(i, dummyData); 253 loader.put(p); 254 region.flush(true); 255 } 256 assertEquals("Before compaction: store files", numHfiles, countStoreFiles()); 257 assertEquals("Before compaction: mob file count", numHfiles, countMobFiles()); 258 assertEquals("Before compaction: rows", numHfiles, UTIL.countRows(region)); 259 assertEquals("Before compaction: mob rows", numHfiles, countMobRows()); 260 assertEquals("Before compaction: number of mob cells", numHfiles, countMobCellsInMetadata()); 261 // now let's delete some cells that contain mobs 262 Delete delete = new Delete(deleteRow); 263 delete.addFamily(COLUMN_FAMILY); 264 region.delete(delete); 265 region.flush(true); 266 267 assertEquals("Before compaction: store files", numHfiles + 1, countStoreFiles()); 268 assertEquals("Before compaction: mob files", numHfiles, countMobFiles()); 269 // region.compactStores(); 270 region.compact(true); 271 assertEquals("After compaction: store files", 1, countStoreFiles()); 272 // still have original mob hfiles and now added a mob del file 273 assertEquals("After compaction: mob files", numHfiles + 1, countMobFiles()); 274 275 Scan scan = new Scan(); 276 scan.setRaw(true); 277 InternalScanner scanner = region.getScanner(scan); 278 List<Cell> results = new ArrayList<>(); 279 scanner.next(results); 280 int deleteCount = 0; 281 while (!results.isEmpty()) { 282 for (Cell c : results) { 283 if (c.getTypeByte() == KeyValue.Type.DeleteFamily.getCode()) { 284 deleteCount++; 285 assertTrue(Bytes.equals(CellUtil.cloneRow(c), deleteRow)); 286 } 287 } 288 results.clear(); 289 scanner.next(results); 290 } 291 // assert the delete mark is retained after the major compaction 292 assertEquals(1, deleteCount); 293 scanner.close(); 294 // assert the deleted cell is not counted 295 assertEquals("The cells in mob files", numHfiles - 1, countMobCellsInMobFiles(1)); 296 } 297 298 private int countStoreFiles() throws IOException { 299 HStore store = region.getStore(COLUMN_FAMILY); 300 return store.getStorefilesCount(); 301 } 302 303 private int countMobFiles() throws IOException { 304 Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); 305 if (fs.exists(mobDirPath)) { 306 FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); 307 return files.length; 308 } 309 return 0; 310 } 311 312 private long countMobCellsInMetadata() throws IOException { 313 long mobCellsCount = 0; 314 Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); 315 Configuration copyOfConf = new Configuration(conf); 316 copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 317 CacheConfig cacheConfig = new CacheConfig(copyOfConf); 318 if (fs.exists(mobDirPath)) { 319 FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); 320 for (FileStatus file : files) { 321 HStoreFile sf = new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true); 322 sf.initReader(); 323 Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo(); 324 byte[] count = fileInfo.get(MOB_CELLS_COUNT); 325 assertTrue(count != null); 326 mobCellsCount += Bytes.toLong(count); 327 } 328 } 329 return mobCellsCount; 330 } 331 332 private Put createPut(int rowIdx, byte[] dummyData) throws IOException { 333 Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(rowIdx))); 334 p.setDurability(Durability.SKIP_WAL); 335 p.addColumn(COLUMN_FAMILY, Bytes.toBytes("colX"), dummyData); 336 return p; 337 } 338 339 /** 340 * Create an HFile with the given number of bytes 341 */ 342 private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException { 343 HFileContext meta = new HFileContextBuilder().build(); 344 HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path) 345 .withFileContext(meta).create(); 346 long now = System.currentTimeMillis(); 347 try { 348 KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY, 349 Bytes.toBytes("colX"), now, dummyData); 350 writer.append(kv); 351 } finally { 352 writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); 353 writer.close(); 354 } 355 } 356 357 private int countMobRows() throws IOException { 358 Scan scan = new Scan(); 359 // Do not retrieve the mob data when scanning 360 scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); 361 InternalScanner scanner = region.getScanner(scan); 362 363 int scannedCount = 0; 364 List<Cell> results = new ArrayList<>(); 365 boolean hasMore = true; 366 while (hasMore) { 367 hasMore = scanner.next(results); 368 for (Cell c : results) { 369 if (MobUtils.isMobReferenceCell(c)) { 370 scannedCount++; 371 } 372 } 373 results.clear(); 374 } 375 scanner.close(); 376 377 return scannedCount; 378 } 379 380 private byte[] makeDummyData(int size) { 381 byte[] dummyData = new byte[size]; 382 new Random().nextBytes(dummyData); 383 return dummyData; 384 } 385 386 private int countReferencedMobFiles() throws IOException { 387 Scan scan = new Scan(); 388 // Do not retrieve the mob data when scanning 389 scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); 390 InternalScanner scanner = region.getScanner(scan); 391 392 List<Cell> kvs = new ArrayList<>(); 393 boolean hasMore = true; 394 String fileName; 395 Set<String> files = new HashSet<>(); 396 do { 397 kvs.clear(); 398 hasMore = scanner.next(kvs); 399 for (Cell kv : kvs) { 400 if (!MobUtils.isMobReferenceCell(kv)) { 401 continue; 402 } 403 if (!MobUtils.hasValidMobRefCellValue(kv)) { 404 continue; 405 } 406 int size = MobUtils.getMobValueLength(kv); 407 if (size <= mobCellThreshold) { 408 continue; 409 } 410 fileName = MobUtils.getMobFileName(kv); 411 if (fileName.isEmpty()) { 412 continue; 413 } 414 files.add(fileName); 415 Path familyPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), 416 hcd.getNameAsString()); 417 assertTrue(fs.exists(new Path(familyPath, fileName))); 418 } 419 } while (hasMore); 420 421 scanner.close(); 422 423 return files.size(); 424 } 425 426 private int countMobCellsInMobFiles(int expectedNumDelfiles) throws IOException { 427 Configuration copyOfConf = new Configuration(conf); 428 copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 429 CacheConfig cacheConfig = new CacheConfig(copyOfConf); 430 Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); 431 List<HStoreFile> sfs = new ArrayList<>(); 432 int numDelfiles = 0; 433 int size = 0; 434 if (fs.exists(mobDirPath)) { 435 for (FileStatus f : fs.listStatus(mobDirPath)) { 436 HStoreFile sf = new HStoreFile(fs, f.getPath(), conf, cacheConfig, BloomType.NONE, true); 437 sfs.add(sf); 438 if (StoreFileInfo.isDelFile(sf.getPath())) { 439 numDelfiles++; 440 } 441 } 442 443 List<StoreFileScanner> scanners = StoreFileScanner.getScannersForStoreFiles(sfs, false, true, 444 false, false, HConstants.LATEST_TIMESTAMP); 445 long timeToPurgeDeletes = Math.max(conf.getLong("hbase.hstore.time.to.purge.deletes", 0), 0); 446 long ttl = HStore.determineTTLFromFamily(hcd); 447 ScanInfo scanInfo = new ScanInfo(copyOfConf, hcd, ttl, timeToPurgeDeletes, 448 CellComparatorImpl.COMPARATOR); 449 StoreScanner scanner = new StoreScanner(scanInfo, ScanType.COMPACT_DROP_DELETES, scanners); 450 try { 451 size += UTIL.countRows(scanner); 452 } finally { 453 scanner.close(); 454 } 455 } 456 // assert the number of the existing del files 457 assertEquals(expectedNumDelfiles, numDelfiles); 458 return size; 459 } 460}