001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.cleaner; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023import static org.junit.Assert.fail; 024 025import java.io.IOException; 026import java.util.Collection; 027import java.util.List; 028import java.util.Set; 029import java.util.regex.Pattern; 030 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.Waiter; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 041import org.apache.hadoop.hbase.client.Put; 042import org.apache.hadoop.hbase.client.SnapshotType; 043import org.apache.hadoop.hbase.client.Table; 044import org.apache.hadoop.hbase.client.TableDescriptor; 045import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 046import org.apache.hadoop.hbase.master.HMaster; 047import org.apache.hadoop.hbase.master.snapshot.DisabledTableSnapshotHandler; 048import org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner; 049import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; 050import org.apache.hadoop.hbase.regionserver.CompactedHFilesDischarger; 051import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; 052import org.apache.hadoop.hbase.regionserver.HRegion; 053import org.apache.hadoop.hbase.regionserver.HRegionServer; 054import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 055import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; 056import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; 057import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; 058import org.apache.hadoop.hbase.testclassification.MasterTests; 059import org.apache.hadoop.hbase.testclassification.MediumTests; 060import org.apache.hadoop.hbase.util.Bytes; 061import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 062import org.apache.hadoop.hbase.util.FSUtils; 063import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 064import org.junit.After; 065import org.junit.AfterClass; 066import org.junit.Before; 067import org.junit.BeforeClass; 068import org.junit.ClassRule; 069import org.junit.Test; 070import org.junit.experimental.categories.Category; 071import org.mockito.Mockito; 072import org.slf4j.Logger; 073import org.slf4j.LoggerFactory; 074 075import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 076 077import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteSnapshotRequest; 078import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest; 079import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsResponse; 080import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; 081import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; 082import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 083 084/** 085 * Test the master-related aspects of a snapshot 086 */ 087@Category({MasterTests.class, MediumTests.class}) 088public class TestSnapshotFromMaster { 089 090 @ClassRule 091 public static final HBaseClassTestRule CLASS_RULE = 092 HBaseClassTestRule.forClass(TestSnapshotFromMaster.class); 093 094 private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotFromMaster.class); 095 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 096 private static final int NUM_RS = 2; 097 private static Path rootDir; 098 private static FileSystem fs; 099 private static HMaster master; 100 101 // for hfile archiving test. 102 private static Path archiveDir; 103 private static final byte[] TEST_FAM = Bytes.toBytes("fam"); 104 private static final TableName TABLE_NAME = 105 TableName.valueOf("test"); 106 // refresh the cache every 1/2 second 107 private static final long cacheRefreshPeriod = 500; 108 private static final int blockingStoreFiles = 12; 109 110 /** 111 * Setup the config for the cluster 112 */ 113 @BeforeClass 114 public static void setupCluster() throws Exception { 115 setupConf(UTIL.getConfiguration()); 116 UTIL.startMiniCluster(NUM_RS); 117 fs = UTIL.getDFSCluster().getFileSystem(); 118 master = UTIL.getMiniHBaseCluster().getMaster(); 119 rootDir = master.getMasterFileSystem().getRootDir(); 120 archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY); 121 } 122 123 private static void setupConf(Configuration conf) { 124 // disable the ui 125 conf.setInt("hbase.regionsever.info.port", -1); 126 // change the flush size to a small amount, regulating number of store files 127 conf.setInt("hbase.hregion.memstore.flush.size", 25000); 128 // so make sure we get a compaction when doing a load, but keep around some 129 // files in the store 130 conf.setInt("hbase.hstore.compaction.min", 2); 131 conf.setInt("hbase.hstore.compactionThreshold", 5); 132 // block writes if we get to 12 store files 133 conf.setInt("hbase.hstore.blockingStoreFiles", blockingStoreFiles); 134 // Ensure no extra cleaners on by default (e.g. TimeToLiveHFileCleaner) 135 conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, ""); 136 conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, ""); 137 // Enable snapshot 138 conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); 139 conf.setLong(SnapshotManager.HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS, 3 * 1000L); 140 conf.setLong(SnapshotHFileCleaner.HFILE_CACHE_REFRESH_PERIOD_CONF_KEY, cacheRefreshPeriod); 141 conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, 142 ConstantSizeRegionSplitPolicy.class.getName()); 143 conf.setInt("hbase.hfile.compactions.cleaner.interval", 20 * 1000); 144 } 145 146 @Before 147 public void setup() throws Exception { 148 UTIL.createTable(TABLE_NAME, TEST_FAM); 149 master.getSnapshotManager().setSnapshotHandlerForTesting(TABLE_NAME, null); 150 } 151 152 @After 153 public void tearDown() throws Exception { 154 UTIL.deleteTable(TABLE_NAME); 155 SnapshotTestingUtils.deleteAllSnapshots(UTIL.getAdmin()); 156 SnapshotTestingUtils.deleteArchiveDirectory(UTIL); 157 } 158 159 @AfterClass 160 public static void cleanupTest() throws Exception { 161 try { 162 UTIL.shutdownMiniCluster(); 163 } catch (Exception e) { 164 // NOOP; 165 } 166 } 167 168 /** 169 * Test that the contract from the master for checking on a snapshot are valid. 170 * <p> 171 * <ol> 172 * <li>If a snapshot fails with an error, we expect to get the source error.</li> 173 * <li>If there is no snapshot name supplied, we should get an error.</li> 174 * <li>If asking about a snapshot has hasn't occurred, you should get an error.</li> 175 * </ol> 176 */ 177 @Test 178 public void testIsDoneContract() throws Exception { 179 180 IsSnapshotDoneRequest.Builder builder = IsSnapshotDoneRequest.newBuilder(); 181 182 String snapshotName = "asyncExpectedFailureTest"; 183 184 // check that we get an exception when looking up snapshot where one hasn't happened 185 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 186 UnknownSnapshotException.class); 187 188 // and that we get the same issue, even if we specify a name 189 SnapshotDescription desc = SnapshotDescription.newBuilder() 190 .setName(snapshotName).setTable(TABLE_NAME.getNameAsString()).build(); 191 builder.setSnapshot(desc); 192 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 193 UnknownSnapshotException.class); 194 195 // set a mock handler to simulate a snapshot 196 DisabledTableSnapshotHandler mockHandler = Mockito.mock(DisabledTableSnapshotHandler.class); 197 Mockito.when(mockHandler.getException()).thenReturn(null); 198 Mockito.when(mockHandler.getSnapshot()).thenReturn(desc); 199 Mockito.when(mockHandler.isFinished()).thenReturn(Boolean.TRUE); 200 Mockito.when(mockHandler.getCompletionTimestamp()) 201 .thenReturn(EnvironmentEdgeManager.currentTime()); 202 203 master.getSnapshotManager() 204 .setSnapshotHandlerForTesting(TABLE_NAME, mockHandler); 205 206 // if we do a lookup without a snapshot name, we should fail - you should always know your name 207 builder = IsSnapshotDoneRequest.newBuilder(); 208 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 209 UnknownSnapshotException.class); 210 211 // then do the lookup for the snapshot that it is done 212 builder.setSnapshot(desc); 213 IsSnapshotDoneResponse response = 214 master.getMasterRpcServices().isSnapshotDone(null, builder.build()); 215 assertTrue("Snapshot didn't complete when it should have.", response.getDone()); 216 217 // now try the case where we are looking for a snapshot we didn't take 218 builder.setSnapshot(SnapshotDescription.newBuilder().setName("Not A Snapshot").build()); 219 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 220 UnknownSnapshotException.class); 221 222 // then create a snapshot to the fs and make sure that we can find it when checking done 223 snapshotName = "completed"; 224 desc = createSnapshot(snapshotName); 225 226 builder.setSnapshot(desc); 227 response = master.getMasterRpcServices().isSnapshotDone(null, builder.build()); 228 assertTrue("Completed, on-disk snapshot not found", response.getDone()); 229 } 230 231 @Test 232 public void testGetCompletedSnapshots() throws Exception { 233 // first check when there are no snapshots 234 GetCompletedSnapshotsRequest request = GetCompletedSnapshotsRequest.newBuilder().build(); 235 GetCompletedSnapshotsResponse response = 236 master.getMasterRpcServices().getCompletedSnapshots(null, request); 237 assertEquals("Found unexpected number of snapshots", 0, response.getSnapshotsCount()); 238 239 // write one snapshot to the fs 240 String snapshotName = "completed"; 241 SnapshotDescription snapshot = createSnapshot(snapshotName); 242 243 // check that we get one snapshot 244 response = master.getMasterRpcServices().getCompletedSnapshots(null, request); 245 assertEquals("Found unexpected number of snapshots", 1, response.getSnapshotsCount()); 246 List<SnapshotDescription> snapshots = response.getSnapshotsList(); 247 List<SnapshotDescription> expected = Lists.newArrayList(snapshot); 248 assertEquals("Returned snapshots don't match created snapshots", expected, snapshots); 249 250 // write a second snapshot 251 snapshotName = "completed_two"; 252 snapshot = createSnapshot(snapshotName); 253 expected.add(snapshot); 254 255 // check that we get one snapshot 256 response = master.getMasterRpcServices().getCompletedSnapshots(null, request); 257 assertEquals("Found unexpected number of snapshots", 2, response.getSnapshotsCount()); 258 snapshots = response.getSnapshotsList(); 259 assertEquals("Returned snapshots don't match created snapshots", expected, snapshots); 260 } 261 262 @Test 263 public void testDeleteSnapshot() throws Exception { 264 265 String snapshotName = "completed"; 266 SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName(snapshotName).build(); 267 268 DeleteSnapshotRequest request = DeleteSnapshotRequest.newBuilder().setSnapshot(snapshot) 269 .build(); 270 try { 271 master.getMasterRpcServices().deleteSnapshot(null, request); 272 fail("Master didn't throw exception when attempting to delete snapshot that doesn't exist"); 273 } catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException e) { 274 // Expected 275 } 276 277 // write one snapshot to the fs 278 createSnapshot(snapshotName); 279 280 // then delete the existing snapshot,which shouldn't cause an exception to be thrown 281 master.getMasterRpcServices().deleteSnapshot(null, request); 282 } 283 284 /** 285 * Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots 286 * should be retained, while those that are not in a snapshot should be deleted. 287 * @throws Exception on failure 288 */ 289 @Test 290 public void testSnapshotHFileArchiving() throws Exception { 291 Admin admin = UTIL.getAdmin(); 292 // make sure we don't fail on listing snapshots 293 SnapshotTestingUtils.assertNoSnapshots(admin); 294 295 // recreate test table with disabled compactions; otherwise compaction may happen before 296 // snapshot, the call after snapshot will be a no-op and checks will fail 297 UTIL.deleteTable(TABLE_NAME); 298 TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME) 299 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAM)) 300 .setCompactionEnabled(false) 301 .build(); 302 UTIL.getAdmin().createTable(td); 303 304 // load the table 305 for (int i = 0; i < blockingStoreFiles / 2; i ++) { 306 UTIL.loadTable(UTIL.getConnection().getTable(TABLE_NAME), TEST_FAM); 307 UTIL.flush(TABLE_NAME); 308 } 309 310 // disable the table so we can take a snapshot 311 admin.disableTable(TABLE_NAME); 312 313 // take a snapshot of the table 314 String snapshotName = "snapshot"; 315 byte[] snapshotNameBytes = Bytes.toBytes(snapshotName); 316 admin.snapshot(snapshotNameBytes, TABLE_NAME); 317 318 LOG.info("After snapshot File-System state"); 319 FSUtils.logFileSystemState(fs, rootDir, LOG); 320 321 // ensure we only have one snapshot 322 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshotNameBytes, TABLE_NAME); 323 324 td = TableDescriptorBuilder.newBuilder(td) 325 .setCompactionEnabled(true) 326 .build(); 327 // enable compactions now 328 admin.modifyTable(td); 329 330 // renable the table so we can compact the regions 331 admin.enableTable(TABLE_NAME); 332 333 // compact the files so we get some archived files for the table we just snapshotted 334 List<HRegion> regions = UTIL.getHBaseCluster().getRegions(TABLE_NAME); 335 for (HRegion region : regions) { 336 region.waitForFlushesAndCompactions(); // enable can trigger a compaction, wait for it. 337 region.compactStores(); // min is 2 so will compact and archive 338 } 339 List<RegionServerThread> regionServerThreads = UTIL.getMiniHBaseCluster() 340 .getRegionServerThreads(); 341 HRegionServer hrs = null; 342 for (RegionServerThread rs : regionServerThreads) { 343 if (!rs.getRegionServer().getRegions(TABLE_NAME).isEmpty()) { 344 hrs = rs.getRegionServer(); 345 break; 346 } 347 } 348 CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null, hrs, false); 349 cleaner.chore(); 350 LOG.info("After compaction File-System state"); 351 FSUtils.logFileSystemState(fs, rootDir, LOG); 352 353 // make sure the cleaner has run 354 LOG.debug("Running hfile cleaners"); 355 ensureHFileCleanersRun(); 356 LOG.info("After cleaners File-System state: " + rootDir); 357 FSUtils.logFileSystemState(fs, rootDir, LOG); 358 359 // get the snapshot files for the table 360 Path snapshotTable = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); 361 Set<String> snapshotHFiles = SnapshotReferenceUtil.getHFileNames( 362 UTIL.getConfiguration(), fs, snapshotTable); 363 // check that the files in the archive contain the ones that we need for the snapshot 364 LOG.debug("Have snapshot hfiles:"); 365 for (String fileName : snapshotHFiles) { 366 LOG.debug(fileName); 367 } 368 // get the archived files for the table 369 Collection<String> archives = getHFiles(archiveDir, fs, TABLE_NAME); 370 371 // get the hfiles for the table 372 Collection<String> hfiles = getHFiles(rootDir, fs, TABLE_NAME); 373 374 // and make sure that there is a proper subset 375 for (String fileName : snapshotHFiles) { 376 boolean exist = archives.contains(fileName) || hfiles.contains(fileName); 377 assertTrue("Archived hfiles " + archives 378 + " and table hfiles " + hfiles + " is missing snapshot file:" + fileName, exist); 379 } 380 381 // delete the existing snapshot 382 admin.deleteSnapshot(snapshotNameBytes); 383 SnapshotTestingUtils.assertNoSnapshots(admin); 384 385 // make sure that we don't keep around the hfiles that aren't in a snapshot 386 // make sure we wait long enough to refresh the snapshot hfile 387 List<BaseHFileCleanerDelegate> delegates = UTIL.getMiniHBaseCluster().getMaster() 388 .getHFileCleaner().cleanersChain; 389 for (BaseHFileCleanerDelegate delegate: delegates) { 390 if (delegate instanceof SnapshotHFileCleaner) { 391 ((SnapshotHFileCleaner)delegate).getFileCacheForTesting().triggerCacheRefreshForTesting(); 392 } 393 } 394 // run the cleaner again 395 LOG.debug("Running hfile cleaners"); 396 ensureHFileCleanersRun(); 397 LOG.info("After delete snapshot cleaners run File-System state"); 398 FSUtils.logFileSystemState(fs, rootDir, LOG); 399 400 archives = getHFiles(archiveDir, fs, TABLE_NAME); 401 assertEquals("Still have some hfiles in the archive, when their snapshot has been deleted.", 0, 402 archives.size()); 403 } 404 405 /** 406 * @return all the HFiles for a given table in the specified dir 407 * @throws IOException on expected failure 408 */ 409 private final Collection<String> getHFiles(Path dir, FileSystem fs, TableName tableName) throws IOException { 410 Path tableDir = FSUtils.getTableDir(dir, tableName); 411 return SnapshotTestingUtils.listHFileNames(fs, tableDir); 412 } 413 414 /** 415 * Make sure the {@link HFileCleaner HFileCleaners} run at least once 416 */ 417 private static void ensureHFileCleanersRun() { 418 UTIL.getHBaseCluster().getMaster().getHFileCleaner().chore(); 419 } 420 421 private SnapshotDescription createSnapshot(final String snapshotName) throws IOException { 422 SnapshotTestingUtils.SnapshotMock snapshotMock = 423 new SnapshotTestingUtils.SnapshotMock(UTIL.getConfiguration(), fs, rootDir); 424 SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = 425 snapshotMock.createSnapshotV2(snapshotName, "test", 0); 426 builder.commit(); 427 return builder.getSnapshotDescription(); 428 } 429 430 @Test 431 public void testAsyncSnapshotWillNotBlockSnapshotHFileCleaner() throws Exception { 432 // Write some data 433 Table table = UTIL.getConnection().getTable(TABLE_NAME); 434 for (int i = 0; i < 10; i++) { 435 Put put = new Put(Bytes.toBytes(i)).addColumn(TEST_FAM, Bytes.toBytes("q"), Bytes.toBytes(i)); 436 table.put(put); 437 } 438 String snapshotName = "testAsyncSnapshotWillNotBlockSnapshotHFileCleaner01"; 439 UTIL.getAdmin().snapshotAsync(new org.apache.hadoop.hbase.client.SnapshotDescription( 440 snapshotName, TABLE_NAME, SnapshotType.FLUSH)); 441 Waiter.waitFor(UTIL.getConfiguration(), 10 * 1000L, 200L, 442 () -> UTIL.getAdmin().listSnapshots(Pattern.compile(snapshotName)).size() == 1); 443 assertTrue(master.getSnapshotManager().isTakingAnySnapshot()); 444 Thread.sleep(11 * 1000L); 445 assertFalse(master.getSnapshotManager().isTakingAnySnapshot()); 446 } 447}