001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.snapshot; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.fail; 022 023import java.io.IOException; 024import java.util.Collections; 025import java.util.Comparator; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.TimeUnit; 030import java.util.concurrent.TimeoutException; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.HRegionInfo; 038import org.apache.hadoop.hbase.TableName; 039import org.apache.hadoop.hbase.TableNotFoundException; 040import org.apache.hadoop.hbase.client.Admin; 041import org.apache.hadoop.hbase.client.SnapshotDescription; 042import org.apache.hadoop.hbase.client.SnapshotType; 043import org.apache.hadoop.hbase.client.Table; 044import org.apache.hadoop.hbase.master.HMaster; 045import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; 046import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.testclassification.RegionServerTests; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.junit.After; 051import org.junit.AfterClass; 052import org.junit.Before; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Test; 056import org.junit.experimental.categories.Category; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 061import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 062 063/** 064 * Test creating/using/deleting snapshots from the client 065 * <p> 066 * This is an end-to-end test for the snapshot utility 067 * 068 * TODO This is essentially a clone of TestSnapshotFromClient. This is worth refactoring this 069 * because there will be a few more flavors of snapshots that need to run these tests. 070 */ 071@Category({RegionServerTests.class, LargeTests.class}) 072public class TestFlushSnapshotFromClient { 073 074 @ClassRule 075 public static final HBaseClassTestRule CLASS_RULE = 076 HBaseClassTestRule.forClass(TestFlushSnapshotFromClient.class); 077 078 private static final Logger LOG = LoggerFactory.getLogger(TestFlushSnapshotFromClient.class); 079 080 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 081 protected static final int NUM_RS = 2; 082 protected static final byte[] TEST_FAM = Bytes.toBytes("fam"); 083 protected static final TableName TABLE_NAME = TableName.valueOf("test"); 084 protected final int DEFAULT_NUM_ROWS = 100; 085 protected Admin admin = null; 086 087 @BeforeClass 088 public static void setupCluster() throws Exception { 089 setupConf(UTIL.getConfiguration()); 090 UTIL.startMiniCluster(NUM_RS); 091 } 092 093 protected static void setupConf(Configuration conf) { 094 // disable the ui 095 conf.setInt("hbase.regionsever.info.port", -1); 096 // change the flush size to a small amount, regulating number of store files 097 conf.setInt("hbase.hregion.memstore.flush.size", 25000); 098 // so make sure we get a compaction when doing a load, but keep around some 099 // files in the store 100 conf.setInt("hbase.hstore.compaction.min", 10); 101 conf.setInt("hbase.hstore.compactionThreshold", 10); 102 // block writes if we get to 12 store files 103 conf.setInt("hbase.hstore.blockingStoreFiles", 12); 104 // Enable snapshot 105 conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); 106 conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, 107 ConstantSizeRegionSplitPolicy.class.getName()); 108 } 109 110 @Before 111 public void setup() throws Exception { 112 createTable(); 113 this.admin = UTIL.getConnection().getAdmin(); 114 } 115 116 protected void createTable() throws Exception { 117 SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM); 118 } 119 120 @After 121 public void tearDown() throws Exception { 122 UTIL.deleteTable(TABLE_NAME); 123 SnapshotTestingUtils.deleteAllSnapshots(this.admin); 124 this.admin.close(); 125 SnapshotTestingUtils.deleteArchiveDirectory(UTIL); 126 } 127 128 @AfterClass 129 public static void cleanupTest() throws Exception { 130 try { 131 UTIL.shutdownMiniCluster(); 132 } catch (Exception e) { 133 LOG.warn("failure shutting down cluster", e); 134 } 135 } 136 137 /** 138 * Test simple flush snapshotting a table that is online 139 */ 140 @Test 141 public void testFlushTableSnapshot() throws Exception { 142 // make sure we don't fail on listing snapshots 143 SnapshotTestingUtils.assertNoSnapshots(admin); 144 145 // put some stuff in the table 146 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 147 148 LOG.debug("FS state before snapshot:"); 149 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 150 151 // take a snapshot of the enabled table 152 String snapshotString = "offlineTableSnapshot"; 153 byte[] snapshot = Bytes.toBytes(snapshotString); 154 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH); 155 LOG.debug("Snapshot completed."); 156 157 // make sure we have the snapshot 158 List<SnapshotDescription> snapshots = 159 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 160 161 // make sure its a valid snapshot 162 LOG.debug("FS state after snapshot:"); 163 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 164 165 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 166 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 167 } 168 169 /** 170 * Test snapshotting a table that is online without flushing 171 */ 172 @Test 173 public void testSkipFlushTableSnapshot() throws Exception { 174 // make sure we don't fail on listing snapshots 175 SnapshotTestingUtils.assertNoSnapshots(admin); 176 177 // put some stuff in the table 178 Table table = UTIL.getConnection().getTable(TABLE_NAME); 179 UTIL.loadTable(table, TEST_FAM); 180 UTIL.flush(TABLE_NAME); 181 182 LOG.debug("FS state before snapshot:"); 183 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 184 185 // take a snapshot of the enabled table 186 String snapshotString = "skipFlushTableSnapshot"; 187 byte[] snapshot = Bytes.toBytes(snapshotString); 188 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH); 189 LOG.debug("Snapshot completed."); 190 191 // make sure we have the snapshot 192 List<SnapshotDescription> snapshots = 193 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 194 195 // make sure its a valid snapshot 196 LOG.debug("FS state after snapshot:"); 197 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 198 199 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 200 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 201 202 admin.deleteSnapshot(snapshot); 203 snapshots = admin.listSnapshots(); 204 SnapshotTestingUtils.assertNoSnapshots(admin); 205 } 206 207 208 /** 209 * Test simple flush snapshotting a table that is online 210 */ 211 @Test 212 public void testFlushTableSnapshotWithProcedure() throws Exception { 213 // make sure we don't fail on listing snapshots 214 SnapshotTestingUtils.assertNoSnapshots(admin); 215 216 // put some stuff in the table 217 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 218 219 LOG.debug("FS state before snapshot:"); 220 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 221 222 // take a snapshot of the enabled table 223 String snapshotString = "offlineTableSnapshot"; 224 byte[] snapshot = Bytes.toBytes(snapshotString); 225 Map<String, String> props = new HashMap<>(); 226 props.put("table", TABLE_NAME.getNameAsString()); 227 admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, 228 snapshotString, props); 229 230 231 LOG.debug("Snapshot completed."); 232 233 // make sure we have the snapshot 234 List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, 235 snapshot, TABLE_NAME); 236 237 // make sure its a valid snapshot 238 LOG.debug("FS state after snapshot:"); 239 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 240 241 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 242 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 243 } 244 245 @Test 246 public void testSnapshotFailsOnNonExistantTable() throws Exception { 247 // make sure we don't fail on listing snapshots 248 SnapshotTestingUtils.assertNoSnapshots(admin); 249 TableName tableName = TableName.valueOf("_not_a_table"); 250 251 // make sure the table doesn't exist 252 boolean fail = false; 253 do { 254 try { 255 admin.getTableDescriptor(tableName); 256 fail = true; 257 LOG.error("Table:" + tableName + " already exists, checking a new name"); 258 tableName = TableName.valueOf(tableName+"!"); 259 } catch (TableNotFoundException e) { 260 fail = false; 261 } 262 } while (fail); 263 264 // snapshot the non-existant table 265 try { 266 admin.snapshot("fail", tableName, SnapshotType.FLUSH); 267 fail("Snapshot succeeded even though there is not table."); 268 } catch (SnapshotCreationException e) { 269 LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage()); 270 } 271 } 272 273 @Test 274 public void testAsyncFlushSnapshot() throws Exception { 275 SnapshotProtos.SnapshotDescription snapshot = SnapshotProtos.SnapshotDescription.newBuilder() 276 .setName("asyncSnapshot").setTable(TABLE_NAME.getNameAsString()) 277 .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build(); 278 279 // take the snapshot async 280 admin.takeSnapshotAsync( 281 new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)); 282 283 // constantly loop, looking for the snapshot to complete 284 HMaster master = UTIL.getMiniHBaseCluster().getMaster(); 285 SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200); 286 LOG.info(" === Async Snapshot Completed ==="); 287 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 288 289 // make sure we get the snapshot 290 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot); 291 } 292 293 @Test 294 public void testSnapshotStateAfterMerge() throws Exception { 295 int numRows = DEFAULT_NUM_ROWS; 296 // make sure we don't fail on listing snapshots 297 SnapshotTestingUtils.assertNoSnapshots(admin); 298 // load the table so we have some data 299 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 300 301 // Take a snapshot 302 String snapshotBeforeMergeName = "snapshotBeforeMerge"; 303 admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH); 304 305 // Clone the table 306 TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge"); 307 admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName); 308 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName); 309 310 // Merge two regions 311 List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME); 312 Collections.sort(regions, new Comparator<HRegionInfo>() { 313 @Override 314 public int compare(HRegionInfo r1, HRegionInfo r2) { 315 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 316 } 317 }); 318 319 int numRegions = admin.getTableRegions(TABLE_NAME).size(); 320 int numRegionsAfterMerge = numRegions - 2; 321 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 322 regions.get(2).getEncodedNameAsBytes(), true); 323 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 324 regions.get(5).getEncodedNameAsBytes(), true); 325 326 // Verify that there's one region less 327 waitRegionsAfterMerge(numRegionsAfterMerge); 328 assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size()); 329 330 // Clone the table 331 TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge"); 332 admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName); 333 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName); 334 335 verifyRowCount(UTIL, TABLE_NAME, numRows); 336 verifyRowCount(UTIL, cloneBeforeMergeName, numRows); 337 verifyRowCount(UTIL, cloneAfterMergeName, numRows); 338 339 // test that we can delete the snapshot 340 UTIL.deleteTable(cloneAfterMergeName); 341 UTIL.deleteTable(cloneBeforeMergeName); 342 } 343 344 @Test 345 public void testTakeSnapshotAfterMerge() throws Exception { 346 int numRows = DEFAULT_NUM_ROWS; 347 // make sure we don't fail on listing snapshots 348 SnapshotTestingUtils.assertNoSnapshots(admin); 349 // load the table so we have some data 350 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 351 352 // Merge two regions 353 List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME); 354 Collections.sort(regions, new Comparator<HRegionInfo>() { 355 @Override 356 public int compare(HRegionInfo r1, HRegionInfo r2) { 357 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 358 } 359 }); 360 361 int numRegions = admin.getTableRegions(TABLE_NAME).size(); 362 int numRegionsAfterMerge = numRegions - 2; 363 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 364 regions.get(2).getEncodedNameAsBytes(), true); 365 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 366 regions.get(5).getEncodedNameAsBytes(), true); 367 368 waitRegionsAfterMerge(numRegionsAfterMerge); 369 assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size()); 370 371 // Take a snapshot 372 String snapshotName = "snapshotAfterMerge"; 373 SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3); 374 375 // Clone the table 376 TableName cloneName = TableName.valueOf("cloneMerge"); 377 admin.cloneSnapshot(snapshotName, cloneName); 378 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName); 379 380 verifyRowCount(UTIL, TABLE_NAME, numRows); 381 verifyRowCount(UTIL, cloneName, numRows); 382 383 // test that we can delete the snapshot 384 UTIL.deleteTable(cloneName); 385 } 386 387 /** 388 * Basic end-to-end test of simple-flush-based snapshots 389 */ 390 @Test 391 public void testFlushCreateListDestroy() throws Exception { 392 LOG.debug("------- Starting Snapshot test -------------"); 393 // make sure we don't fail on listing snapshots 394 SnapshotTestingUtils.assertNoSnapshots(admin); 395 // load the table so we have some data 396 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 397 398 String snapshotName = "flushSnapshotCreateListDestroy"; 399 FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); 400 Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); 401 SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM), 402 snapshotName, rootDir, fs, true); 403 } 404 405 private void waitRegionsAfterMerge(final long numRegionsAfterMerge) 406 throws IOException, InterruptedException { 407 // Verify that there's one region less 408 long startTime = System.currentTimeMillis(); 409 while (admin.getTableRegions(TABLE_NAME).size() != numRegionsAfterMerge) { 410 // This may be flaky... if after 15sec the merge is not complete give up 411 // it will fail in the assertEquals(numRegionsAfterMerge). 412 if ((System.currentTimeMillis() - startTime) > 15000) 413 break; 414 Thread.sleep(100); 415 } 416 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME); 417 } 418 419 420 protected void verifyRowCount(final HBaseTestingUtility util, final TableName tableName, 421 long expectedRows) throws IOException { 422 SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows); 423 } 424 425 protected int countRows(final Table table, final byte[]... families) throws IOException { 426 return UTIL.countRows(table, families); 427 } 428}