001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.snapshot;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022import static org.junit.Assert.fail;
023
024import java.io.IOException;
025import java.util.Collections;
026import java.util.Comparator;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.concurrent.CountDownLatch;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtility;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.HRegionInfo;
038import org.apache.hadoop.hbase.TableName;
039import org.apache.hadoop.hbase.TableNotFoundException;
040import org.apache.hadoop.hbase.client.Admin;
041import org.apache.hadoop.hbase.client.SnapshotDescription;
042import org.apache.hadoop.hbase.client.SnapshotType;
043import org.apache.hadoop.hbase.client.Table;
044import org.apache.hadoop.hbase.master.HMaster;
045import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
046import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.testclassification.RegionServerTests;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.junit.After;
051import org.junit.AfterClass;
052import org.junit.Before;
053import org.junit.BeforeClass;
054import org.junit.ClassRule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
061import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
062
063/**
064 * Test creating/using/deleting snapshots from the client
065 * <p>
066 * This is an end-to-end test for the snapshot utility
067 *
068 * TODO This is essentially a clone of TestSnapshotFromClient.  This is worth refactoring this
069 * because there will be a few more flavors of snapshots that need to run these tests.
070 */
071@Category({RegionServerTests.class, LargeTests.class})
072public class TestFlushSnapshotFromClient {
073
074  @ClassRule
075  public static final HBaseClassTestRule CLASS_RULE =
076      HBaseClassTestRule.forClass(TestFlushSnapshotFromClient.class);
077
078  private static final Logger LOG = LoggerFactory.getLogger(TestFlushSnapshotFromClient.class);
079
080  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
081  protected static final int NUM_RS = 2;
082  protected static final byte[] TEST_FAM = Bytes.toBytes("fam");
083  protected static final TableName TABLE_NAME = TableName.valueOf("test");
084  protected final int DEFAULT_NUM_ROWS = 100;
085  protected Admin admin = null;
086
087  @BeforeClass
088  public static void setupCluster() throws Exception {
089    setupConf(UTIL.getConfiguration());
090    UTIL.startMiniCluster(NUM_RS);
091  }
092
093  protected static void setupConf(Configuration conf) {
094    // disable the ui
095    conf.setInt("hbase.regionsever.info.port", -1);
096    // change the flush size to a small amount, regulating number of store files
097    conf.setInt("hbase.hregion.memstore.flush.size", 25000);
098    // so make sure we get a compaction when doing a load, but keep around some
099    // files in the store
100    conf.setInt("hbase.hstore.compaction.min", 10);
101    conf.setInt("hbase.hstore.compactionThreshold", 10);
102    // block writes if we get to 12 store files
103    conf.setInt("hbase.hstore.blockingStoreFiles", 12);
104    // Enable snapshot
105    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
106    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
107        ConstantSizeRegionSplitPolicy.class.getName());
108  }
109
110  @Before
111  public void setup() throws Exception {
112    createTable();
113    this.admin = UTIL.getConnection().getAdmin();
114  }
115
116  protected void createTable() throws Exception {
117    SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM);
118  }
119
120  @After
121  public void tearDown() throws Exception {
122    UTIL.deleteTable(TABLE_NAME);
123    SnapshotTestingUtils.deleteAllSnapshots(this.admin);
124    this.admin.close();
125    SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
126  }
127
128  @AfterClass
129  public static void cleanupTest() throws Exception {
130    try {
131      UTIL.shutdownMiniCluster();
132    } catch (Exception e) {
133      LOG.warn("failure shutting down cluster", e);
134    }
135  }
136
137  /**
138   * Test simple flush snapshotting a table that is online
139   * @throws Exception
140   */
141  @Test
142  public void testFlushTableSnapshot() throws Exception {
143    // make sure we don't fail on listing snapshots
144    SnapshotTestingUtils.assertNoSnapshots(admin);
145
146    // put some stuff in the table
147    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
148
149    LOG.debug("FS state before snapshot:");
150    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
151
152    // take a snapshot of the enabled table
153    String snapshotString = "offlineTableSnapshot";
154    byte[] snapshot = Bytes.toBytes(snapshotString);
155    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH);
156    LOG.debug("Snapshot completed.");
157
158    // make sure we have the snapshot
159    List<SnapshotDescription> snapshots =
160        SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);
161
162    // make sure its a valid snapshot
163    LOG.debug("FS state after snapshot:");
164    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
165
166    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
167      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
168  }
169
170   /**
171   * Test snapshotting a table that is online without flushing
172   * @throws Exception
173   */
174  @Test
175  public void testSkipFlushTableSnapshot() throws Exception {
176    // make sure we don't fail on listing snapshots
177    SnapshotTestingUtils.assertNoSnapshots(admin);
178
179    // put some stuff in the table
180    Table table = UTIL.getConnection().getTable(TABLE_NAME);
181    UTIL.loadTable(table, TEST_FAM);
182    UTIL.flush(TABLE_NAME);
183
184    LOG.debug("FS state before snapshot:");
185    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
186
187    // take a snapshot of the enabled table
188    String snapshotString = "skipFlushTableSnapshot";
189    byte[] snapshot = Bytes.toBytes(snapshotString);
190    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH);
191    LOG.debug("Snapshot completed.");
192
193    // make sure we have the snapshot
194    List<SnapshotDescription> snapshots =
195        SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);
196
197    // make sure its a valid snapshot
198    LOG.debug("FS state after snapshot:");
199    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
200
201    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
202      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
203
204    admin.deleteSnapshot(snapshot);
205    snapshots = admin.listSnapshots();
206    SnapshotTestingUtils.assertNoSnapshots(admin);
207  }
208
209
210  /**
211   * Test simple flush snapshotting a table that is online
212   * @throws Exception
213   */
214  @Test
215  public void testFlushTableSnapshotWithProcedure() throws Exception {
216    // make sure we don't fail on listing snapshots
217    SnapshotTestingUtils.assertNoSnapshots(admin);
218
219    // put some stuff in the table
220    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
221
222    LOG.debug("FS state before snapshot:");
223    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
224
225    // take a snapshot of the enabled table
226    String snapshotString = "offlineTableSnapshot";
227    byte[] snapshot = Bytes.toBytes(snapshotString);
228    Map<String, String> props = new HashMap<>();
229    props.put("table", TABLE_NAME.getNameAsString());
230    admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION,
231        snapshotString, props);
232
233
234    LOG.debug("Snapshot completed.");
235
236    // make sure we have the snapshot
237    List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
238      snapshot, TABLE_NAME);
239
240    // make sure its a valid snapshot
241    LOG.debug("FS state after snapshot:");
242    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
243
244    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
245      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
246  }
247
248  @Test
249  public void testSnapshotFailsOnNonExistantTable() throws Exception {
250    // make sure we don't fail on listing snapshots
251    SnapshotTestingUtils.assertNoSnapshots(admin);
252    TableName tableName = TableName.valueOf("_not_a_table");
253
254    // make sure the table doesn't exist
255    boolean fail = false;
256    do {
257    try {
258      admin.getTableDescriptor(tableName);
259      fail = true;
260      LOG.error("Table:" + tableName + " already exists, checking a new name");
261      tableName = TableName.valueOf(tableName+"!");
262    } catch (TableNotFoundException e) {
263      fail = false;
264      }
265    } while (fail);
266
267    // snapshot the non-existant table
268    try {
269      admin.snapshot("fail", tableName, SnapshotType.FLUSH);
270      fail("Snapshot succeeded even though there is not table.");
271    } catch (SnapshotCreationException e) {
272      LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage());
273    }
274  }
275
276  @Test
277  public void testAsyncFlushSnapshot() throws Exception {
278    SnapshotProtos.SnapshotDescription snapshot = SnapshotProtos.SnapshotDescription.newBuilder()
279        .setName("asyncSnapshot").setTable(TABLE_NAME.getNameAsString())
280        .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build();
281
282    // take the snapshot async
283    admin.takeSnapshotAsync(
284      new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH));
285
286    // constantly loop, looking for the snapshot to complete
287    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
288    SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200);
289    LOG.info(" === Async Snapshot Completed ===");
290    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
291
292    // make sure we get the snapshot
293    SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot);
294  }
295
296  @Test
297  public void testSnapshotStateAfterMerge() throws Exception {
298    int numRows = DEFAULT_NUM_ROWS;
299    // make sure we don't fail on listing snapshots
300    SnapshotTestingUtils.assertNoSnapshots(admin);
301    // load the table so we have some data
302    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
303
304    // Take a snapshot
305    String snapshotBeforeMergeName = "snapshotBeforeMerge";
306    admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH);
307
308    // Clone the table
309    TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge");
310    admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName);
311    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName);
312
313    // Merge two regions
314    List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
315    Collections.sort(regions, new Comparator<HRegionInfo>() {
316      @Override
317      public int compare(HRegionInfo r1, HRegionInfo r2) {
318        return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
319      }
320    });
321
322    int numRegions = admin.getTableRegions(TABLE_NAME).size();
323    int numRegionsAfterMerge = numRegions - 2;
324    admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(),
325        regions.get(2).getEncodedNameAsBytes(), true);
326    admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(),
327        regions.get(5).getEncodedNameAsBytes(), true);
328
329    // Verify that there's one region less
330    waitRegionsAfterMerge(numRegionsAfterMerge);
331    assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());
332
333    // Clone the table
334    TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge");
335    admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName);
336    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName);
337
338    verifyRowCount(UTIL, TABLE_NAME, numRows);
339    verifyRowCount(UTIL, cloneBeforeMergeName, numRows);
340    verifyRowCount(UTIL, cloneAfterMergeName, numRows);
341
342    // test that we can delete the snapshot
343    UTIL.deleteTable(cloneAfterMergeName);
344    UTIL.deleteTable(cloneBeforeMergeName);
345  }
346
347  @Test
348  public void testTakeSnapshotAfterMerge() throws Exception {
349    int numRows = DEFAULT_NUM_ROWS;
350    // make sure we don't fail on listing snapshots
351    SnapshotTestingUtils.assertNoSnapshots(admin);
352    // load the table so we have some data
353    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
354
355    // Merge two regions
356    List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
357    Collections.sort(regions, new Comparator<HRegionInfo>() {
358      @Override
359      public int compare(HRegionInfo r1, HRegionInfo r2) {
360        return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
361      }
362    });
363
364    int numRegions = admin.getTableRegions(TABLE_NAME).size();
365    int numRegionsAfterMerge = numRegions - 2;
366    admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(),
367        regions.get(2).getEncodedNameAsBytes(), true);
368    admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(),
369        regions.get(5).getEncodedNameAsBytes(), true);
370
371    waitRegionsAfterMerge(numRegionsAfterMerge);
372    assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());
373
374    // Take a snapshot
375    String snapshotName = "snapshotAfterMerge";
376    SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3);
377
378    // Clone the table
379    TableName cloneName = TableName.valueOf("cloneMerge");
380    admin.cloneSnapshot(snapshotName, cloneName);
381    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName);
382
383    verifyRowCount(UTIL, TABLE_NAME, numRows);
384    verifyRowCount(UTIL, cloneName, numRows);
385
386    // test that we can delete the snapshot
387    UTIL.deleteTable(cloneName);
388  }
389
390  /**
391   * Basic end-to-end test of simple-flush-based snapshots
392   */
393  @Test
394  public void testFlushCreateListDestroy() throws Exception {
395    LOG.debug("------- Starting Snapshot test -------------");
396    // make sure we don't fail on listing snapshots
397    SnapshotTestingUtils.assertNoSnapshots(admin);
398    // load the table so we have some data
399    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
400
401    String snapshotName = "flushSnapshotCreateListDestroy";
402    FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
403    Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
404    SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM),
405      snapshotName, rootDir, fs, true);
406  }
407
408  /**
409   * Demonstrate that we reject snapshot requests if there is a snapshot already running on the
410   * same table currently running and that concurrent snapshots on different tables can both
411   * succeed concurretly.
412   */
413  @Test
414  public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException {
415    final TableName TABLE2_NAME = TableName.valueOf(TABLE_NAME + "2");
416
417    int ssNum = 20;
418    // make sure we don't fail on listing snapshots
419    SnapshotTestingUtils.assertNoSnapshots(admin);
420    // create second testing table
421    SnapshotTestingUtils.createTable(UTIL, TABLE2_NAME, TEST_FAM);
422    // load the table so we have some data
423    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
424    SnapshotTestingUtils.loadData(UTIL, TABLE2_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
425
426    final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum);
427    // We'll have one of these per thread
428    class SSRunnable implements Runnable {
429      SnapshotDescription ss;
430      SSRunnable(SnapshotDescription ss) {
431        this.ss = ss;
432      }
433
434      @Override
435      public void run() {
436        try {
437          LOG.info("Submitting snapshot request: " + ClientSnapshotDescriptionUtils
438              .toString(ProtobufUtil.createHBaseProtosSnapshotDesc(ss)));
439          admin.takeSnapshotAsync(ss);
440        } catch (Exception e) {
441          LOG.info("Exception during snapshot request: " + ClientSnapshotDescriptionUtils.toString(
442            ProtobufUtil.createHBaseProtosSnapshotDesc(ss))
443              + ".  This is ok, we expect some", e);
444        }
445        LOG.info("Submitted snapshot request: " + ClientSnapshotDescriptionUtils
446            .toString(ProtobufUtil.createHBaseProtosSnapshotDesc(ss)));
447        toBeSubmitted.countDown();
448      }
449    };
450
451    // build descriptions
452    SnapshotDescription[] descs = new SnapshotDescription[ssNum];
453    for (int i = 0; i < ssNum; i++) {
454      if(i % 2 ==0) {
455        descs[i] = new SnapshotDescription("ss" + i, TABLE_NAME, SnapshotType.FLUSH);
456      } else {
457        descs[i] = new SnapshotDescription("ss" + i, TABLE2_NAME, SnapshotType.FLUSH);
458      }
459    }
460
461    // kick each off its own thread
462    for (int i=0 ; i < ssNum; i++) {
463      new Thread(new SSRunnable(descs[i])).start();
464    }
465
466    // wait until all have been submitted
467    toBeSubmitted.await();
468
469    // loop until all are done.
470    while (true) {
471      int doneCount = 0;
472      for (SnapshotDescription ss : descs) {
473        try {
474          if (admin.isSnapshotFinished(ss)) {
475            doneCount++;
476          }
477        } catch (Exception e) {
478          LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e);
479          doneCount++;
480        }
481      }
482      if (doneCount == descs.length) {
483        break;
484      }
485      Thread.sleep(100);
486    }
487
488    // dump for debugging
489    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
490
491    List<SnapshotDescription> taken = admin.listSnapshots();
492    int takenSize = taken.size();
493    LOG.info("Taken " + takenSize + " snapshots:  " + taken);
494    assertTrue("We expect at least 1 request to be rejected because of we concurrently" +
495        " issued many requests", takenSize < ssNum && takenSize > 0);
496
497    // Verify that there's at least one snapshot per table
498    int t1SnapshotsCount = 0;
499    int t2SnapshotsCount = 0;
500    for (SnapshotDescription ss : taken) {
501      if (ss.getTableName().equals(TABLE_NAME)) {
502        t1SnapshotsCount++;
503      } else if (ss.getTableName().equals(TABLE2_NAME)) {
504        t2SnapshotsCount++;
505      }
506    }
507    assertTrue("We expect at least 1 snapshot of table1 ", t1SnapshotsCount > 0);
508    assertTrue("We expect at least 1 snapshot of table2 ", t2SnapshotsCount > 0);
509
510    UTIL.deleteTable(TABLE2_NAME);
511  }
512
513  private void waitRegionsAfterMerge(final long numRegionsAfterMerge)
514      throws IOException, InterruptedException {
515    // Verify that there's one region less
516    long startTime = System.currentTimeMillis();
517    while (admin.getTableRegions(TABLE_NAME).size() != numRegionsAfterMerge) {
518      // This may be flaky... if after 15sec the merge is not complete give up
519      // it will fail in the assertEquals(numRegionsAfterMerge).
520      if ((System.currentTimeMillis() - startTime) > 15000)
521        break;
522      Thread.sleep(100);
523    }
524    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME);
525  }
526
527
528  protected void verifyRowCount(final HBaseTestingUtility util, final TableName tableName,
529      long expectedRows) throws IOException {
530    SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows);
531  }
532
533  protected int countRows(final Table table, final byte[]... families) throws IOException {
534    return UTIL.countRows(table, families);
535  }
536}