001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.snapshot; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022import static org.junit.Assert.fail; 023 024import java.io.IOException; 025import java.util.Collections; 026import java.util.Comparator; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.concurrent.CountDownLatch; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.HRegionInfo; 038import org.apache.hadoop.hbase.TableName; 039import org.apache.hadoop.hbase.TableNotFoundException; 040import org.apache.hadoop.hbase.client.Admin; 041import org.apache.hadoop.hbase.client.SnapshotDescription; 042import org.apache.hadoop.hbase.client.SnapshotType; 043import org.apache.hadoop.hbase.client.Table; 044import org.apache.hadoop.hbase.master.HMaster; 045import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; 046import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.testclassification.RegionServerTests; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.junit.After; 051import org.junit.AfterClass; 052import org.junit.Before; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Test; 056import org.junit.experimental.categories.Category; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 061import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 062 063/** 064 * Test creating/using/deleting snapshots from the client 065 * <p> 066 * This is an end-to-end test for the snapshot utility 067 * 068 * TODO This is essentially a clone of TestSnapshotFromClient. This is worth refactoring this 069 * because there will be a few more flavors of snapshots that need to run these tests. 070 */ 071@Category({RegionServerTests.class, LargeTests.class}) 072public class TestFlushSnapshotFromClient { 073 074 @ClassRule 075 public static final HBaseClassTestRule CLASS_RULE = 076 HBaseClassTestRule.forClass(TestFlushSnapshotFromClient.class); 077 078 private static final Logger LOG = LoggerFactory.getLogger(TestFlushSnapshotFromClient.class); 079 080 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 081 protected static final int NUM_RS = 2; 082 protected static final byte[] TEST_FAM = Bytes.toBytes("fam"); 083 protected static final TableName TABLE_NAME = TableName.valueOf("test"); 084 protected final int DEFAULT_NUM_ROWS = 100; 085 protected Admin admin = null; 086 087 @BeforeClass 088 public static void setupCluster() throws Exception { 089 setupConf(UTIL.getConfiguration()); 090 UTIL.startMiniCluster(NUM_RS); 091 } 092 093 protected static void setupConf(Configuration conf) { 094 // disable the ui 095 conf.setInt("hbase.regionsever.info.port", -1); 096 // change the flush size to a small amount, regulating number of store files 097 conf.setInt("hbase.hregion.memstore.flush.size", 25000); 098 // so make sure we get a compaction when doing a load, but keep around some 099 // files in the store 100 conf.setInt("hbase.hstore.compaction.min", 10); 101 conf.setInt("hbase.hstore.compactionThreshold", 10); 102 // block writes if we get to 12 store files 103 conf.setInt("hbase.hstore.blockingStoreFiles", 12); 104 // Enable snapshot 105 conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); 106 conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, 107 ConstantSizeRegionSplitPolicy.class.getName()); 108 } 109 110 @Before 111 public void setup() throws Exception { 112 createTable(); 113 this.admin = UTIL.getConnection().getAdmin(); 114 } 115 116 protected void createTable() throws Exception { 117 SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM); 118 } 119 120 @After 121 public void tearDown() throws Exception { 122 UTIL.deleteTable(TABLE_NAME); 123 SnapshotTestingUtils.deleteAllSnapshots(this.admin); 124 this.admin.close(); 125 SnapshotTestingUtils.deleteArchiveDirectory(UTIL); 126 } 127 128 @AfterClass 129 public static void cleanupTest() throws Exception { 130 try { 131 UTIL.shutdownMiniCluster(); 132 } catch (Exception e) { 133 LOG.warn("failure shutting down cluster", e); 134 } 135 } 136 137 /** 138 * Test simple flush snapshotting a table that is online 139 * @throws Exception 140 */ 141 @Test 142 public void testFlushTableSnapshot() throws Exception { 143 // make sure we don't fail on listing snapshots 144 SnapshotTestingUtils.assertNoSnapshots(admin); 145 146 // put some stuff in the table 147 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 148 149 LOG.debug("FS state before snapshot:"); 150 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 151 152 // take a snapshot of the enabled table 153 String snapshotString = "offlineTableSnapshot"; 154 byte[] snapshot = Bytes.toBytes(snapshotString); 155 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH); 156 LOG.debug("Snapshot completed."); 157 158 // make sure we have the snapshot 159 List<SnapshotDescription> snapshots = 160 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 161 162 // make sure its a valid snapshot 163 LOG.debug("FS state after snapshot:"); 164 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 165 166 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 167 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 168 } 169 170 /** 171 * Test snapshotting a table that is online without flushing 172 * @throws Exception 173 */ 174 @Test 175 public void testSkipFlushTableSnapshot() throws Exception { 176 // make sure we don't fail on listing snapshots 177 SnapshotTestingUtils.assertNoSnapshots(admin); 178 179 // put some stuff in the table 180 Table table = UTIL.getConnection().getTable(TABLE_NAME); 181 UTIL.loadTable(table, TEST_FAM); 182 UTIL.flush(TABLE_NAME); 183 184 LOG.debug("FS state before snapshot:"); 185 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 186 187 // take a snapshot of the enabled table 188 String snapshotString = "skipFlushTableSnapshot"; 189 byte[] snapshot = Bytes.toBytes(snapshotString); 190 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH); 191 LOG.debug("Snapshot completed."); 192 193 // make sure we have the snapshot 194 List<SnapshotDescription> snapshots = 195 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 196 197 // make sure its a valid snapshot 198 LOG.debug("FS state after snapshot:"); 199 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 200 201 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 202 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 203 204 admin.deleteSnapshot(snapshot); 205 snapshots = admin.listSnapshots(); 206 SnapshotTestingUtils.assertNoSnapshots(admin); 207 } 208 209 210 /** 211 * Test simple flush snapshotting a table that is online 212 * @throws Exception 213 */ 214 @Test 215 public void testFlushTableSnapshotWithProcedure() throws Exception { 216 // make sure we don't fail on listing snapshots 217 SnapshotTestingUtils.assertNoSnapshots(admin); 218 219 // put some stuff in the table 220 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 221 222 LOG.debug("FS state before snapshot:"); 223 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 224 225 // take a snapshot of the enabled table 226 String snapshotString = "offlineTableSnapshot"; 227 byte[] snapshot = Bytes.toBytes(snapshotString); 228 Map<String, String> props = new HashMap<>(); 229 props.put("table", TABLE_NAME.getNameAsString()); 230 admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, 231 snapshotString, props); 232 233 234 LOG.debug("Snapshot completed."); 235 236 // make sure we have the snapshot 237 List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, 238 snapshot, TABLE_NAME); 239 240 // make sure its a valid snapshot 241 LOG.debug("FS state after snapshot:"); 242 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 243 244 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 245 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 246 } 247 248 @Test 249 public void testSnapshotFailsOnNonExistantTable() throws Exception { 250 // make sure we don't fail on listing snapshots 251 SnapshotTestingUtils.assertNoSnapshots(admin); 252 TableName tableName = TableName.valueOf("_not_a_table"); 253 254 // make sure the table doesn't exist 255 boolean fail = false; 256 do { 257 try { 258 admin.getTableDescriptor(tableName); 259 fail = true; 260 LOG.error("Table:" + tableName + " already exists, checking a new name"); 261 tableName = TableName.valueOf(tableName+"!"); 262 } catch (TableNotFoundException e) { 263 fail = false; 264 } 265 } while (fail); 266 267 // snapshot the non-existant table 268 try { 269 admin.snapshot("fail", tableName, SnapshotType.FLUSH); 270 fail("Snapshot succeeded even though there is not table."); 271 } catch (SnapshotCreationException e) { 272 LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage()); 273 } 274 } 275 276 @Test 277 public void testAsyncFlushSnapshot() throws Exception { 278 SnapshotProtos.SnapshotDescription snapshot = SnapshotProtos.SnapshotDescription.newBuilder() 279 .setName("asyncSnapshot").setTable(TABLE_NAME.getNameAsString()) 280 .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build(); 281 282 // take the snapshot async 283 admin.takeSnapshotAsync( 284 new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)); 285 286 // constantly loop, looking for the snapshot to complete 287 HMaster master = UTIL.getMiniHBaseCluster().getMaster(); 288 SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200); 289 LOG.info(" === Async Snapshot Completed ==="); 290 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 291 292 // make sure we get the snapshot 293 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot); 294 } 295 296 @Test 297 public void testSnapshotStateAfterMerge() throws Exception { 298 int numRows = DEFAULT_NUM_ROWS; 299 // make sure we don't fail on listing snapshots 300 SnapshotTestingUtils.assertNoSnapshots(admin); 301 // load the table so we have some data 302 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 303 304 // Take a snapshot 305 String snapshotBeforeMergeName = "snapshotBeforeMerge"; 306 admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH); 307 308 // Clone the table 309 TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge"); 310 admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName); 311 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName); 312 313 // Merge two regions 314 List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME); 315 Collections.sort(regions, new Comparator<HRegionInfo>() { 316 @Override 317 public int compare(HRegionInfo r1, HRegionInfo r2) { 318 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 319 } 320 }); 321 322 int numRegions = admin.getTableRegions(TABLE_NAME).size(); 323 int numRegionsAfterMerge = numRegions - 2; 324 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 325 regions.get(2).getEncodedNameAsBytes(), true); 326 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 327 regions.get(5).getEncodedNameAsBytes(), true); 328 329 // Verify that there's one region less 330 waitRegionsAfterMerge(numRegionsAfterMerge); 331 assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size()); 332 333 // Clone the table 334 TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge"); 335 admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName); 336 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName); 337 338 verifyRowCount(UTIL, TABLE_NAME, numRows); 339 verifyRowCount(UTIL, cloneBeforeMergeName, numRows); 340 verifyRowCount(UTIL, cloneAfterMergeName, numRows); 341 342 // test that we can delete the snapshot 343 UTIL.deleteTable(cloneAfterMergeName); 344 UTIL.deleteTable(cloneBeforeMergeName); 345 } 346 347 @Test 348 public void testTakeSnapshotAfterMerge() throws Exception { 349 int numRows = DEFAULT_NUM_ROWS; 350 // make sure we don't fail on listing snapshots 351 SnapshotTestingUtils.assertNoSnapshots(admin); 352 // load the table so we have some data 353 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 354 355 // Merge two regions 356 List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME); 357 Collections.sort(regions, new Comparator<HRegionInfo>() { 358 @Override 359 public int compare(HRegionInfo r1, HRegionInfo r2) { 360 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 361 } 362 }); 363 364 int numRegions = admin.getTableRegions(TABLE_NAME).size(); 365 int numRegionsAfterMerge = numRegions - 2; 366 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 367 regions.get(2).getEncodedNameAsBytes(), true); 368 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 369 regions.get(5).getEncodedNameAsBytes(), true); 370 371 waitRegionsAfterMerge(numRegionsAfterMerge); 372 assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size()); 373 374 // Take a snapshot 375 String snapshotName = "snapshotAfterMerge"; 376 SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3); 377 378 // Clone the table 379 TableName cloneName = TableName.valueOf("cloneMerge"); 380 admin.cloneSnapshot(snapshotName, cloneName); 381 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName); 382 383 verifyRowCount(UTIL, TABLE_NAME, numRows); 384 verifyRowCount(UTIL, cloneName, numRows); 385 386 // test that we can delete the snapshot 387 UTIL.deleteTable(cloneName); 388 } 389 390 /** 391 * Basic end-to-end test of simple-flush-based snapshots 392 */ 393 @Test 394 public void testFlushCreateListDestroy() throws Exception { 395 LOG.debug("------- Starting Snapshot test -------------"); 396 // make sure we don't fail on listing snapshots 397 SnapshotTestingUtils.assertNoSnapshots(admin); 398 // load the table so we have some data 399 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 400 401 String snapshotName = "flushSnapshotCreateListDestroy"; 402 FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); 403 Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); 404 SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM), 405 snapshotName, rootDir, fs, true); 406 } 407 408 /** 409 * Demonstrate that we reject snapshot requests if there is a snapshot already running on the 410 * same table currently running and that concurrent snapshots on different tables can both 411 * succeed concurretly. 412 */ 413 @Test 414 public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException { 415 final TableName TABLE2_NAME = TableName.valueOf(TABLE_NAME + "2"); 416 417 int ssNum = 20; 418 // make sure we don't fail on listing snapshots 419 SnapshotTestingUtils.assertNoSnapshots(admin); 420 // create second testing table 421 SnapshotTestingUtils.createTable(UTIL, TABLE2_NAME, TEST_FAM); 422 // load the table so we have some data 423 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 424 SnapshotTestingUtils.loadData(UTIL, TABLE2_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 425 426 final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum); 427 // We'll have one of these per thread 428 class SSRunnable implements Runnable { 429 SnapshotDescription ss; 430 SSRunnable(SnapshotDescription ss) { 431 this.ss = ss; 432 } 433 434 @Override 435 public void run() { 436 try { 437 LOG.info("Submitting snapshot request: " + ClientSnapshotDescriptionUtils 438 .toString(ProtobufUtil.createHBaseProtosSnapshotDesc(ss))); 439 admin.takeSnapshotAsync(ss); 440 } catch (Exception e) { 441 LOG.info("Exception during snapshot request: " + ClientSnapshotDescriptionUtils.toString( 442 ProtobufUtil.createHBaseProtosSnapshotDesc(ss)) 443 + ". This is ok, we expect some", e); 444 } 445 LOG.info("Submitted snapshot request: " + ClientSnapshotDescriptionUtils 446 .toString(ProtobufUtil.createHBaseProtosSnapshotDesc(ss))); 447 toBeSubmitted.countDown(); 448 } 449 }; 450 451 // build descriptions 452 SnapshotDescription[] descs = new SnapshotDescription[ssNum]; 453 for (int i = 0; i < ssNum; i++) { 454 if(i % 2 ==0) { 455 descs[i] = new SnapshotDescription("ss" + i, TABLE_NAME, SnapshotType.FLUSH); 456 } else { 457 descs[i] = new SnapshotDescription("ss" + i, TABLE2_NAME, SnapshotType.FLUSH); 458 } 459 } 460 461 // kick each off its own thread 462 for (int i=0 ; i < ssNum; i++) { 463 new Thread(new SSRunnable(descs[i])).start(); 464 } 465 466 // wait until all have been submitted 467 toBeSubmitted.await(); 468 469 // loop until all are done. 470 while (true) { 471 int doneCount = 0; 472 for (SnapshotDescription ss : descs) { 473 try { 474 if (admin.isSnapshotFinished(ss)) { 475 doneCount++; 476 } 477 } catch (Exception e) { 478 LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e); 479 doneCount++; 480 } 481 } 482 if (doneCount == descs.length) { 483 break; 484 } 485 Thread.sleep(100); 486 } 487 488 // dump for debugging 489 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 490 491 List<SnapshotDescription> taken = admin.listSnapshots(); 492 int takenSize = taken.size(); 493 LOG.info("Taken " + takenSize + " snapshots: " + taken); 494 assertTrue("We expect at least 1 request to be rejected because of we concurrently" + 495 " issued many requests", takenSize < ssNum && takenSize > 0); 496 497 // Verify that there's at least one snapshot per table 498 int t1SnapshotsCount = 0; 499 int t2SnapshotsCount = 0; 500 for (SnapshotDescription ss : taken) { 501 if (ss.getTableName().equals(TABLE_NAME)) { 502 t1SnapshotsCount++; 503 } else if (ss.getTableName().equals(TABLE2_NAME)) { 504 t2SnapshotsCount++; 505 } 506 } 507 assertTrue("We expect at least 1 snapshot of table1 ", t1SnapshotsCount > 0); 508 assertTrue("We expect at least 1 snapshot of table2 ", t2SnapshotsCount > 0); 509 510 UTIL.deleteTable(TABLE2_NAME); 511 } 512 513 private void waitRegionsAfterMerge(final long numRegionsAfterMerge) 514 throws IOException, InterruptedException { 515 // Verify that there's one region less 516 long startTime = System.currentTimeMillis(); 517 while (admin.getTableRegions(TABLE_NAME).size() != numRegionsAfterMerge) { 518 // This may be flaky... if after 15sec the merge is not complete give up 519 // it will fail in the assertEquals(numRegionsAfterMerge). 520 if ((System.currentTimeMillis() - startTime) > 15000) 521 break; 522 Thread.sleep(100); 523 } 524 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME); 525 } 526 527 528 protected void verifyRowCount(final HBaseTestingUtility util, final TableName tableName, 529 long expectedRows) throws IOException { 530 SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows); 531 } 532 533 protected int countRows(final Table table, final byte[]... families) throws IOException { 534 return UTIL.countRows(table, families); 535 } 536}