001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.snapshot; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.fail; 022 023import java.io.IOException; 024import java.util.Collections; 025import java.util.Comparator; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.TimeUnit; 030import java.util.concurrent.TimeoutException; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.HBaseTestingUtil; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.TableName; 037import org.apache.hadoop.hbase.TableNotFoundException; 038import org.apache.hadoop.hbase.client.Admin; 039import org.apache.hadoop.hbase.client.RegionInfo; 040import org.apache.hadoop.hbase.client.SnapshotDescription; 041import org.apache.hadoop.hbase.client.SnapshotType; 042import org.apache.hadoop.hbase.client.Table; 043import org.apache.hadoop.hbase.master.HMaster; 044import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; 045import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; 046import org.apache.hadoop.hbase.testclassification.LargeTests; 047import org.apache.hadoop.hbase.testclassification.RegionServerTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 050import org.junit.jupiter.api.AfterAll; 051import org.junit.jupiter.api.AfterEach; 052import org.junit.jupiter.api.BeforeAll; 053import org.junit.jupiter.api.BeforeEach; 054import org.junit.jupiter.api.Tag; 055import org.junit.jupiter.api.Test; 056import org.junit.jupiter.api.TestInfo; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 061 062import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 063import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; 064import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; 065import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 066 067/** 068 * Test creating/using/deleting snapshots from the client 069 * <p> 070 * This is an end-to-end test for the snapshot utility TODO This is essentially a clone of 071 * TestSnapshotFromClient. This is worth refactoring this because there will be a few more flavors 072 * of snapshots that need to run these tests. 073 */ 074@Tag(RegionServerTests.TAG) 075@Tag(LargeTests.TAG) 076public class TestFlushSnapshotFromClient { 077 078 private static final Logger LOG = LoggerFactory.getLogger(TestFlushSnapshotFromClient.class); 079 080 protected static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 081 protected static final int NUM_RS = 2; 082 protected static final byte[] TEST_FAM = Bytes.toBytes("fam"); 083 protected static final TableName TABLE_NAME = TableName.valueOf("test"); 084 protected final int DEFAULT_NUM_ROWS = 100; 085 protected Admin admin = null; 086 087 @BeforeAll 088 public static void setupCluster(TestInfo testInfo) throws Exception { 089 if (testInfo.getTestClass().orElse(null) != TestFlushSnapshotFromClient.class) { 090 return; 091 } 092 setupConf(UTIL.getConfiguration()); 093 UTIL.startMiniCluster(NUM_RS); 094 } 095 096 protected static void setupConf(Configuration conf) { 097 // disable the ui 098 conf.setInt("hbase.regionsever.info.port", -1); 099 // change the flush size to a small amount, regulating number of store files 100 conf.setInt("hbase.hregion.memstore.flush.size", 25000); 101 // so make sure we get a compaction when doing a load, but keep around some 102 // files in the store 103 conf.setInt("hbase.hstore.compaction.min", 10); 104 conf.setInt("hbase.hstore.compactionThreshold", 10); 105 // block writes if we get to 12 store files 106 conf.setInt("hbase.hstore.blockingStoreFiles", 12); 107 // Enable snapshot 108 conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); 109 conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, 110 ConstantSizeRegionSplitPolicy.class.getName()); 111 } 112 113 @BeforeEach 114 public void setup() throws Exception { 115 createTable(); 116 this.admin = UTIL.getConnection().getAdmin(); 117 } 118 119 protected void createTable() throws Exception { 120 SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM); 121 } 122 123 @AfterEach 124 public void tearDown() throws Exception { 125 UTIL.deleteTable(TABLE_NAME); 126 SnapshotTestingUtils.deleteAllSnapshots(this.admin); 127 this.admin.close(); 128 SnapshotTestingUtils.deleteArchiveDirectory(UTIL); 129 } 130 131 @AfterAll 132 public static void cleanupTest() throws Exception { 133 try { 134 UTIL.shutdownMiniCluster(); 135 } catch (Exception e) { 136 LOG.warn("failure shutting down cluster", e); 137 } 138 } 139 140 /** 141 * Test simple flush snapshotting a table that is online 142 */ 143 @Test 144 public void testFlushTableSnapshot() throws Exception { 145 // make sure we don't fail on listing snapshots 146 SnapshotTestingUtils.assertNoSnapshots(admin); 147 148 // put some stuff in the table 149 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 150 151 LOG.debug("FS state before snapshot:"); 152 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 153 154 // take a snapshot of the enabled table 155 String snapshotString = "offlineTableSnapshot"; 156 byte[] snapshot = Bytes.toBytes(snapshotString); 157 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH); 158 LOG.debug("Snapshot completed."); 159 160 // make sure we have the snapshot 161 List<SnapshotDescription> snapshots = 162 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 163 164 // make sure its a valid snapshot 165 LOG.debug("FS state after snapshot:"); 166 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 167 168 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 169 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 170 } 171 172 /** 173 * Test snapshotting a table that is online without flushing 174 */ 175 @Test 176 public void testSkipFlushTableSnapshot() throws Exception { 177 // make sure we don't fail on listing snapshots 178 SnapshotTestingUtils.assertNoSnapshots(admin); 179 180 // put some stuff in the table 181 Table table = UTIL.getConnection().getTable(TABLE_NAME); 182 UTIL.loadTable(table, TEST_FAM); 183 UTIL.flush(TABLE_NAME); 184 185 LOG.debug("FS state before snapshot:"); 186 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 187 188 // take a snapshot of the enabled table 189 String snapshotString = "skipFlushTableSnapshot"; 190 String snapshot = snapshotString; 191 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH); 192 LOG.debug("Snapshot completed."); 193 194 // make sure we have the snapshot 195 List<SnapshotDescription> snapshots = 196 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 197 198 // make sure its a valid snapshot 199 LOG.debug("FS state after snapshot:"); 200 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 201 202 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 203 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 204 205 admin.deleteSnapshot(snapshot); 206 snapshots = admin.listSnapshots(); 207 SnapshotTestingUtils.assertNoSnapshots(admin); 208 } 209 210 /** 211 * Test simple flush snapshotting a table that is online 212 */ 213 @Test 214 public void testFlushTableSnapshotWithProcedure() throws Exception { 215 // make sure we don't fail on listing snapshots 216 SnapshotTestingUtils.assertNoSnapshots(admin); 217 218 // put some stuff in the table 219 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 220 221 LOG.debug("FS state before snapshot:"); 222 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 223 224 // take a snapshot of the enabled table 225 String snapshotString = "offlineTableSnapshot"; 226 byte[] snapshot = Bytes.toBytes(snapshotString); 227 Map<String, String> props = new HashMap<>(); 228 props.put("table", TABLE_NAME.getNameAsString()); 229 admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, snapshotString, 230 props); 231 232 LOG.debug("Snapshot completed."); 233 234 // make sure we have the snapshot 235 List<SnapshotDescription> snapshots = 236 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 237 238 // make sure its a valid snapshot 239 LOG.debug("FS state after snapshot:"); 240 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 241 242 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 243 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 244 } 245 246 @Test 247 public void testSnapshotFailsOnNonExistantTable() throws Exception { 248 // make sure we don't fail on listing snapshots 249 SnapshotTestingUtils.assertNoSnapshots(admin); 250 TableName tableName = TableName.valueOf("_not_a_table"); 251 252 // make sure the table doesn't exist 253 boolean fail = false; 254 do { 255 try { 256 admin.getDescriptor(tableName); 257 fail = true; 258 LOG.error("Table:" + tableName + " already exists, checking a new name"); 259 tableName = TableName.valueOf(tableName + "!"); 260 } catch (TableNotFoundException e) { 261 fail = false; 262 } 263 } while (fail); 264 265 // snapshot the non-existant table 266 try { 267 admin.snapshot("fail", tableName, SnapshotType.FLUSH); 268 fail("Snapshot succeeded even though there is not table."); 269 } catch (SnapshotCreationException e) { 270 LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage()); 271 } 272 } 273 274 /** 275 * Helper method for testing async snapshot operations. Just waits for the given snapshot to 276 * complete on the server by repeatedly checking the master. 277 * @param master the master running the snapshot 278 * @param snapshot the snapshot to check 279 * @param timeoutNanos the timeout in nano between checks to see if the snapshot is done 280 */ 281 private static void waitForSnapshotToComplete(HMaster master, 282 SnapshotProtos.SnapshotDescription snapshot, long timeoutNanos) throws Exception { 283 final IsSnapshotDoneRequest request = 284 IsSnapshotDoneRequest.newBuilder().setSnapshot(snapshot).build(); 285 long start = System.nanoTime(); 286 while (System.nanoTime() - start < timeoutNanos) { 287 try { 288 IsSnapshotDoneResponse done = master.getMasterRpcServices().isSnapshotDone(null, request); 289 if (done.getDone()) { 290 return; 291 } 292 } catch (ServiceException e) { 293 // ignore UnknownSnapshotException, this is possible as for AsyncAdmin, the method will 294 // return immediately after sending out the request, no matter whether the master has 295 // processed the request or not. 296 if (!(e.getCause() instanceof UnknownSnapshotException)) { 297 throw e; 298 } 299 } 300 301 Thread.sleep(200); 302 } 303 throw new TimeoutException("Timeout waiting for snapshot " + snapshot + " to complete"); 304 } 305 306 @Test 307 public void testAsyncFlushSnapshot() throws Exception { 308 SnapshotProtos.SnapshotDescription snapshot = SnapshotProtos.SnapshotDescription.newBuilder() 309 .setName("asyncSnapshot").setTable(TABLE_NAME.getNameAsString()) 310 .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build(); 311 312 // take the snapshot async 313 admin.snapshotAsync(new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)); 314 315 // constantly loop, looking for the snapshot to complete 316 HMaster master = UTIL.getMiniHBaseCluster().getMaster(); 317 waitForSnapshotToComplete(master, snapshot, TimeUnit.MINUTES.toNanos(1)); 318 LOG.info(" === Async Snapshot Completed ==="); 319 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 320 321 // make sure we get the snapshot 322 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot); 323 } 324 325 @Test 326 public void testSnapshotStateAfterMerge() throws Exception { 327 int numRows = DEFAULT_NUM_ROWS; 328 // make sure we don't fail on listing snapshots 329 SnapshotTestingUtils.assertNoSnapshots(admin); 330 // load the table so we have some data 331 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 332 333 // Take a snapshot 334 String snapshotBeforeMergeName = "snapshotBeforeMerge"; 335 admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH); 336 337 // Clone the table 338 TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge"); 339 admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName); 340 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName); 341 342 // Merge two regions 343 List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 344 Collections.sort(regions, new Comparator<RegionInfo>() { 345 @Override 346 public int compare(RegionInfo r1, RegionInfo r2) { 347 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 348 } 349 }); 350 351 int numRegions = admin.getRegions(TABLE_NAME).size(); 352 int numRegionsAfterMerge = numRegions - 2; 353 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 354 regions.get(2).getEncodedNameAsBytes(), true); 355 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 356 regions.get(5).getEncodedNameAsBytes(), true); 357 358 // Verify that there's one region less 359 waitRegionsAfterMerge(numRegionsAfterMerge); 360 assertEquals(numRegionsAfterMerge, admin.getRegions(TABLE_NAME).size()); 361 362 // Clone the table 363 TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge"); 364 admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName); 365 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName); 366 367 verifyRowCount(UTIL, TABLE_NAME, numRows); 368 verifyRowCount(UTIL, cloneBeforeMergeName, numRows); 369 verifyRowCount(UTIL, cloneAfterMergeName, numRows); 370 371 // test that we can delete the snapshot 372 UTIL.deleteTable(cloneAfterMergeName); 373 UTIL.deleteTable(cloneBeforeMergeName); 374 } 375 376 @Test 377 public void testTakeSnapshotAfterMerge() throws Exception { 378 int numRows = DEFAULT_NUM_ROWS; 379 // make sure we don't fail on listing snapshots 380 SnapshotTestingUtils.assertNoSnapshots(admin); 381 // load the table so we have some data 382 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 383 384 // Merge two regions 385 List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 386 Collections.sort(regions, new Comparator<RegionInfo>() { 387 @Override 388 public int compare(RegionInfo r1, RegionInfo r2) { 389 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 390 } 391 }); 392 393 int numRegions = admin.getRegions(TABLE_NAME).size(); 394 int numRegionsAfterMerge = numRegions - 2; 395 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 396 regions.get(2).getEncodedNameAsBytes(), true); 397 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 398 regions.get(5).getEncodedNameAsBytes(), true); 399 400 waitRegionsAfterMerge(numRegionsAfterMerge); 401 assertEquals(numRegionsAfterMerge, admin.getRegions(TABLE_NAME).size()); 402 403 // Take a snapshot 404 String snapshotName = "snapshotAfterMerge"; 405 SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3); 406 407 // Clone the table 408 TableName cloneName = TableName.valueOf("cloneMerge"); 409 admin.cloneSnapshot(snapshotName, cloneName); 410 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName); 411 412 verifyRowCount(UTIL, TABLE_NAME, numRows); 413 verifyRowCount(UTIL, cloneName, numRows); 414 415 // test that we can delete the snapshot 416 UTIL.deleteTable(cloneName); 417 } 418 419 /** 420 * Basic end-to-end test of simple-flush-based snapshots 421 */ 422 @Test 423 public void testFlushCreateListDestroy() throws Exception { 424 LOG.debug("------- Starting Snapshot test -------------"); 425 // make sure we don't fail on listing snapshots 426 SnapshotTestingUtils.assertNoSnapshots(admin); 427 // load the table so we have some data 428 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 429 430 String snapshotName = "flushSnapshotCreateListDestroy"; 431 FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); 432 Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); 433 SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM), 434 snapshotName, rootDir, fs, true); 435 } 436 437 private void waitRegionsAfterMerge(final long numRegionsAfterMerge) 438 throws IOException, InterruptedException { 439 // Verify that there's one region less 440 long startTime = EnvironmentEdgeManager.currentTime(); 441 while (admin.getRegions(TABLE_NAME).size() != numRegionsAfterMerge) { 442 // This may be flaky... if after 15sec the merge is not complete give up 443 // it will fail in the assertEquals(numRegionsAfterMerge). 444 if ((EnvironmentEdgeManager.currentTime() - startTime) > 15000) { 445 break; 446 } 447 Thread.sleep(100); 448 } 449 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME); 450 } 451 452 protected void verifyRowCount(final HBaseTestingUtil util, final TableName tableName, 453 long expectedRows) throws IOException { 454 SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows); 455 } 456 457 protected int countRows(final Table table, final byte[]... families) throws IOException { 458 return UTIL.countRows(table, families); 459 } 460}