001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.cleaner; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022import static org.junit.Assert.fail; 023 024import java.io.IOException; 025import java.util.Collection; 026import java.util.List; 027import java.util.Set; 028import java.util.regex.Pattern; 029 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.HBaseClassTestRule; 034import org.apache.hadoop.hbase.HBaseTestingUtility; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.TableName; 037import org.apache.hadoop.hbase.Waiter; 038import org.apache.hadoop.hbase.client.Admin; 039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 040import org.apache.hadoop.hbase.client.Put; 041import org.apache.hadoop.hbase.client.SnapshotType; 042import org.apache.hadoop.hbase.client.Table; 043import org.apache.hadoop.hbase.client.TableDescriptor; 044import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 045import org.apache.hadoop.hbase.master.HMaster; 046import org.apache.hadoop.hbase.master.snapshot.DisabledTableSnapshotHandler; 047import org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner; 048import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; 049import org.apache.hadoop.hbase.regionserver.CompactedHFilesDischarger; 050import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; 051import org.apache.hadoop.hbase.regionserver.HRegion; 052import org.apache.hadoop.hbase.regionserver.HRegionServer; 053import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 054import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; 055import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; 056import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; 057import org.apache.hadoop.hbase.testclassification.MasterTests; 058import org.apache.hadoop.hbase.testclassification.MediumTests; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 061import org.apache.hadoop.hbase.util.FSUtils; 062import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 063import org.junit.After; 064import org.junit.AfterClass; 065import org.junit.Before; 066import org.junit.BeforeClass; 067import org.junit.ClassRule; 068import org.junit.Test; 069import org.junit.experimental.categories.Category; 070import org.mockito.Mockito; 071import org.slf4j.Logger; 072import org.slf4j.LoggerFactory; 073 074import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 075 076import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteSnapshotRequest; 077import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest; 078import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsResponse; 079import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; 080import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; 081import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 082 083/** 084 * Test the master-related aspects of a snapshot 085 */ 086@Category({MasterTests.class, MediumTests.class}) 087public class TestSnapshotFromMaster { 088 089 @ClassRule 090 public static final HBaseClassTestRule CLASS_RULE = 091 HBaseClassTestRule.forClass(TestSnapshotFromMaster.class); 092 093 private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotFromMaster.class); 094 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 095 private static final int NUM_RS = 2; 096 private static Path rootDir; 097 private static FileSystem fs; 098 private static HMaster master; 099 100 // for hfile archiving test. 101 private static Path archiveDir; 102 private static final byte[] TEST_FAM = Bytes.toBytes("fam"); 103 private static final TableName TABLE_NAME = 104 TableName.valueOf("test"); 105 // refresh the cache every 1/2 second 106 private static final long cacheRefreshPeriod = 500; 107 private static final int blockingStoreFiles = 12; 108 109 /** 110 * Setup the config for the cluster 111 */ 112 @BeforeClass 113 public static void setupCluster() throws Exception { 114 setupConf(UTIL.getConfiguration()); 115 UTIL.startMiniCluster(NUM_RS); 116 fs = UTIL.getDFSCluster().getFileSystem(); 117 master = UTIL.getMiniHBaseCluster().getMaster(); 118 rootDir = master.getMasterFileSystem().getRootDir(); 119 archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY); 120 } 121 122 private static void setupConf(Configuration conf) { 123 // disable the ui 124 conf.setInt("hbase.regionsever.info.port", -1); 125 // change the flush size to a small amount, regulating number of store files 126 conf.setInt("hbase.hregion.memstore.flush.size", 25000); 127 // so make sure we get a compaction when doing a load, but keep around some 128 // files in the store 129 conf.setInt("hbase.hstore.compaction.min", 2); 130 conf.setInt("hbase.hstore.compactionThreshold", 5); 131 // block writes if we get to 12 store files 132 conf.setInt("hbase.hstore.blockingStoreFiles", blockingStoreFiles); 133 // Ensure no extra cleaners on by default (e.g. TimeToLiveHFileCleaner) 134 conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, ""); 135 conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, ""); 136 // Enable snapshot 137 conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); 138 conf.setLong(SnapshotManager.HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS, 3 * 1000L); 139 conf.setLong(SnapshotHFileCleaner.HFILE_CACHE_REFRESH_PERIOD_CONF_KEY, cacheRefreshPeriod); 140 conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, 141 ConstantSizeRegionSplitPolicy.class.getName()); 142 conf.setInt("hbase.hfile.compactions.cleaner.interval", 20 * 1000); 143 } 144 145 @Before 146 public void setup() throws Exception { 147 UTIL.createTable(TABLE_NAME, TEST_FAM); 148 master.getSnapshotManager().setSnapshotHandlerForTesting(TABLE_NAME, null); 149 } 150 151 @After 152 public void tearDown() throws Exception { 153 UTIL.deleteTable(TABLE_NAME); 154 SnapshotTestingUtils.deleteAllSnapshots(UTIL.getAdmin()); 155 SnapshotTestingUtils.deleteArchiveDirectory(UTIL); 156 } 157 158 @AfterClass 159 public static void cleanupTest() throws Exception { 160 try { 161 UTIL.shutdownMiniCluster(); 162 } catch (Exception e) { 163 // NOOP; 164 } 165 } 166 167 /** 168 * Test that the contract from the master for checking on a snapshot are valid. 169 * <p> 170 * <ol> 171 * <li>If a snapshot fails with an error, we expect to get the source error.</li> 172 * <li>If there is no snapshot name supplied, we should get an error.</li> 173 * <li>If asking about a snapshot has hasn't occurred, you should get an error.</li> 174 * </ol> 175 */ 176 @Test 177 public void testIsDoneContract() throws Exception { 178 179 IsSnapshotDoneRequest.Builder builder = IsSnapshotDoneRequest.newBuilder(); 180 181 String snapshotName = "asyncExpectedFailureTest"; 182 183 // check that we get an exception when looking up snapshot where one hasn't happened 184 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 185 UnknownSnapshotException.class); 186 187 // and that we get the same issue, even if we specify a name 188 SnapshotDescription desc = SnapshotDescription.newBuilder() 189 .setName(snapshotName).setTable(TABLE_NAME.getNameAsString()).build(); 190 builder.setSnapshot(desc); 191 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 192 UnknownSnapshotException.class); 193 194 // set a mock handler to simulate a snapshot 195 DisabledTableSnapshotHandler mockHandler = Mockito.mock(DisabledTableSnapshotHandler.class); 196 Mockito.when(mockHandler.getException()).thenReturn(null); 197 Mockito.when(mockHandler.getSnapshot()).thenReturn(desc); 198 Mockito.when(mockHandler.isFinished()).thenReturn(Boolean.TRUE); 199 Mockito.when(mockHandler.getCompletionTimestamp()) 200 .thenReturn(EnvironmentEdgeManager.currentTime()); 201 202 master.getSnapshotManager() 203 .setSnapshotHandlerForTesting(TABLE_NAME, mockHandler); 204 205 // if we do a lookup without a snapshot name, we should fail - you should always know your name 206 builder = IsSnapshotDoneRequest.newBuilder(); 207 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 208 UnknownSnapshotException.class); 209 210 // then do the lookup for the snapshot that it is done 211 builder.setSnapshot(desc); 212 IsSnapshotDoneResponse response = 213 master.getMasterRpcServices().isSnapshotDone(null, builder.build()); 214 assertTrue("Snapshot didn't complete when it should have.", response.getDone()); 215 216 // now try the case where we are looking for a snapshot we didn't take 217 builder.setSnapshot(SnapshotDescription.newBuilder().setName("Not A Snapshot").build()); 218 SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(), 219 UnknownSnapshotException.class); 220 221 // then create a snapshot to the fs and make sure that we can find it when checking done 222 snapshotName = "completed"; 223 desc = createSnapshot(snapshotName); 224 225 builder.setSnapshot(desc); 226 response = master.getMasterRpcServices().isSnapshotDone(null, builder.build()); 227 assertTrue("Completed, on-disk snapshot not found", response.getDone()); 228 } 229 230 @Test 231 public void testGetCompletedSnapshots() throws Exception { 232 // first check when there are no snapshots 233 GetCompletedSnapshotsRequest request = GetCompletedSnapshotsRequest.newBuilder().build(); 234 GetCompletedSnapshotsResponse response = 235 master.getMasterRpcServices().getCompletedSnapshots(null, request); 236 assertEquals("Found unexpected number of snapshots", 0, response.getSnapshotsCount()); 237 238 // write one snapshot to the fs 239 String snapshotName = "completed"; 240 SnapshotDescription snapshot = createSnapshot(snapshotName); 241 242 // check that we get one snapshot 243 response = master.getMasterRpcServices().getCompletedSnapshots(null, request); 244 assertEquals("Found unexpected number of snapshots", 1, response.getSnapshotsCount()); 245 List<SnapshotDescription> snapshots = response.getSnapshotsList(); 246 List<SnapshotDescription> expected = Lists.newArrayList(snapshot); 247 assertEquals("Returned snapshots don't match created snapshots", expected, snapshots); 248 249 // write a second snapshot 250 snapshotName = "completed_two"; 251 snapshot = createSnapshot(snapshotName); 252 expected.add(snapshot); 253 254 // check that we get one snapshot 255 response = master.getMasterRpcServices().getCompletedSnapshots(null, request); 256 assertEquals("Found unexpected number of snapshots", 2, response.getSnapshotsCount()); 257 snapshots = response.getSnapshotsList(); 258 assertEquals("Returned snapshots don't match created snapshots", expected, snapshots); 259 } 260 261 @Test 262 public void testDeleteSnapshot() throws Exception { 263 264 String snapshotName = "completed"; 265 SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName(snapshotName).build(); 266 267 DeleteSnapshotRequest request = DeleteSnapshotRequest.newBuilder().setSnapshot(snapshot) 268 .build(); 269 try { 270 master.getMasterRpcServices().deleteSnapshot(null, request); 271 fail("Master didn't throw exception when attempting to delete snapshot that doesn't exist"); 272 } catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException e) { 273 // Expected 274 } 275 276 // write one snapshot to the fs 277 createSnapshot(snapshotName); 278 279 // then delete the existing snapshot,which shouldn't cause an exception to be thrown 280 master.getMasterRpcServices().deleteSnapshot(null, request); 281 } 282 283 /** 284 * Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots 285 * should be retained, while those that are not in a snapshot should be deleted. 286 * @throws Exception on failure 287 */ 288 @Test 289 public void testSnapshotHFileArchiving() throws Exception { 290 Admin admin = UTIL.getAdmin(); 291 // make sure we don't fail on listing snapshots 292 SnapshotTestingUtils.assertNoSnapshots(admin); 293 294 // recreate test table with disabled compactions; otherwise compaction may happen before 295 // snapshot, the call after snapshot will be a no-op and checks will fail 296 UTIL.deleteTable(TABLE_NAME); 297 TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME) 298 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAM)) 299 .setCompactionEnabled(false) 300 .build(); 301 UTIL.getAdmin().createTable(td); 302 303 // load the table 304 for (int i = 0; i < blockingStoreFiles / 2; i ++) { 305 UTIL.loadTable(UTIL.getConnection().getTable(TABLE_NAME), TEST_FAM); 306 UTIL.flush(TABLE_NAME); 307 } 308 309 // disable the table so we can take a snapshot 310 admin.disableTable(TABLE_NAME); 311 312 // take a snapshot of the table 313 String snapshotName = "snapshot"; 314 byte[] snapshotNameBytes = Bytes.toBytes(snapshotName); 315 admin.snapshot(snapshotNameBytes, TABLE_NAME); 316 317 LOG.info("After snapshot File-System state"); 318 FSUtils.logFileSystemState(fs, rootDir, LOG); 319 320 // ensure we only have one snapshot 321 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshotNameBytes, TABLE_NAME); 322 323 td = TableDescriptorBuilder.newBuilder(td) 324 .setCompactionEnabled(true) 325 .build(); 326 // enable compactions now 327 admin.modifyTable(td); 328 329 // renable the table so we can compact the regions 330 admin.enableTable(TABLE_NAME); 331 332 // compact the files so we get some archived files for the table we just snapshotted 333 List<HRegion> regions = UTIL.getHBaseCluster().getRegions(TABLE_NAME); 334 for (HRegion region : regions) { 335 region.waitForFlushesAndCompactions(); // enable can trigger a compaction, wait for it. 336 region.compactStores(); // min is 2 so will compact and archive 337 } 338 List<RegionServerThread> regionServerThreads = UTIL.getMiniHBaseCluster() 339 .getRegionServerThreads(); 340 HRegionServer hrs = null; 341 for (RegionServerThread rs : regionServerThreads) { 342 if (!rs.getRegionServer().getRegions(TABLE_NAME).isEmpty()) { 343 hrs = rs.getRegionServer(); 344 break; 345 } 346 } 347 CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null, hrs, false); 348 cleaner.chore(); 349 LOG.info("After compaction File-System state"); 350 FSUtils.logFileSystemState(fs, rootDir, LOG); 351 352 // make sure the cleaner has run 353 LOG.debug("Running hfile cleaners"); 354 ensureHFileCleanersRun(); 355 LOG.info("After cleaners File-System state: " + rootDir); 356 FSUtils.logFileSystemState(fs, rootDir, LOG); 357 358 // get the snapshot files for the table 359 Path snapshotTable = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); 360 Set<String> snapshotHFiles = SnapshotReferenceUtil.getHFileNames( 361 UTIL.getConfiguration(), fs, snapshotTable); 362 // check that the files in the archive contain the ones that we need for the snapshot 363 LOG.debug("Have snapshot hfiles:"); 364 for (String fileName : snapshotHFiles) { 365 LOG.debug(fileName); 366 } 367 // get the archived files for the table 368 Collection<String> archives = getHFiles(archiveDir, fs, TABLE_NAME); 369 370 // get the hfiles for the table 371 Collection<String> hfiles = getHFiles(rootDir, fs, TABLE_NAME); 372 373 // and make sure that there is a proper subset 374 for (String fileName : snapshotHFiles) { 375 boolean exist = archives.contains(fileName) || hfiles.contains(fileName); 376 assertTrue("Archived hfiles " + archives 377 + " and table hfiles " + hfiles + " is missing snapshot file:" + fileName, exist); 378 } 379 380 // delete the existing snapshot 381 admin.deleteSnapshot(snapshotNameBytes); 382 SnapshotTestingUtils.assertNoSnapshots(admin); 383 384 // make sure that we don't keep around the hfiles that aren't in a snapshot 385 // make sure we wait long enough to refresh the snapshot hfile 386 List<BaseHFileCleanerDelegate> delegates = UTIL.getMiniHBaseCluster().getMaster() 387 .getHFileCleaner().cleanersChain; 388 for (BaseHFileCleanerDelegate delegate: delegates) { 389 if (delegate instanceof SnapshotHFileCleaner) { 390 ((SnapshotHFileCleaner)delegate).getFileCacheForTesting().triggerCacheRefreshForTesting(); 391 } 392 } 393 // run the cleaner again 394 LOG.debug("Running hfile cleaners"); 395 ensureHFileCleanersRun(); 396 LOG.info("After delete snapshot cleaners run File-System state"); 397 FSUtils.logFileSystemState(fs, rootDir, LOG); 398 399 archives = getHFiles(archiveDir, fs, TABLE_NAME); 400 assertEquals("Still have some hfiles in the archive, when their snapshot has been deleted.", 0, 401 archives.size()); 402 } 403 404 /** 405 * @return all the HFiles for a given table in the specified dir 406 * @throws IOException on expected failure 407 */ 408 private final Collection<String> getHFiles(Path dir, FileSystem fs, TableName tableName) throws IOException { 409 Path tableDir = FSUtils.getTableDir(dir, tableName); 410 return SnapshotTestingUtils.listHFileNames(fs, tableDir); 411 } 412 413 /** 414 * Make sure the {@link HFileCleaner HFileCleaners} run at least once 415 */ 416 private static void ensureHFileCleanersRun() { 417 UTIL.getHBaseCluster().getMaster().getHFileCleaner().chore(); 418 } 419 420 private SnapshotDescription createSnapshot(final String snapshotName) throws IOException { 421 SnapshotTestingUtils.SnapshotMock snapshotMock = 422 new SnapshotTestingUtils.SnapshotMock(UTIL.getConfiguration(), fs, rootDir); 423 SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = 424 snapshotMock.createSnapshotV2(snapshotName, "test", 0); 425 builder.commit(); 426 return builder.getSnapshotDescription(); 427 } 428 429 @Test 430 public void testAsyncSnapshotWillNotBlockSnapshotHFileCleaner() throws Exception { 431 // Write some data 432 Table table = UTIL.getConnection().getTable(TABLE_NAME); 433 for (int i = 0; i < 10; i++) { 434 Put put = new Put(Bytes.toBytes(i)).addColumn(TEST_FAM, Bytes.toBytes("q"), Bytes.toBytes(i)); 435 table.put(put); 436 } 437 String snapshotName = "testAsyncSnapshotWillNotBlockSnapshotHFileCleaner01"; 438 UTIL.getAdmin().snapshotAsync(new org.apache.hadoop.hbase.client.SnapshotDescription( 439 snapshotName, TABLE_NAME, SnapshotType.FLUSH)); 440 Waiter.waitFor(UTIL.getConfiguration(), 10 * 1000L, 200L, 441 () -> UTIL.getAdmin().listSnapshots(Pattern.compile(snapshotName)).size() == 1); 442 UTIL.waitFor(30000, () -> !master.getSnapshotManager().isTakingAnySnapshot()); 443 } 444}