001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.snapshot;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.fail;
022
023import java.io.IOException;
024import java.util.Collections;
025import java.util.Comparator;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.concurrent.TimeUnit;
030import java.util.concurrent.TimeoutException;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtil;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.TableNotFoundException;
039import org.apache.hadoop.hbase.client.Admin;
040import org.apache.hadoop.hbase.client.RegionInfo;
041import org.apache.hadoop.hbase.client.SnapshotDescription;
042import org.apache.hadoop.hbase.client.SnapshotType;
043import org.apache.hadoop.hbase.client.Table;
044import org.apache.hadoop.hbase.master.HMaster;
045import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
046import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.testclassification.RegionServerTests;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
051import org.junit.After;
052import org.junit.AfterClass;
053import org.junit.Before;
054import org.junit.BeforeClass;
055import org.junit.ClassRule;
056import org.junit.Test;
057import org.junit.experimental.categories.Category;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
062
063import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
064import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
065import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
066import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
067
068/**
069 * Test creating/using/deleting snapshots from the client
070 * <p>
071 * This is an end-to-end test for the snapshot utility TODO This is essentially a clone of
072 * TestSnapshotFromClient. This is worth refactoring this because there will be a few more flavors
073 * of snapshots that need to run these tests.
074 */
075@Category({ RegionServerTests.class, LargeTests.class })
076public class TestFlushSnapshotFromClient {
077
078  @ClassRule
079  public static final HBaseClassTestRule CLASS_RULE =
080    HBaseClassTestRule.forClass(TestFlushSnapshotFromClient.class);
081
082  private static final Logger LOG = LoggerFactory.getLogger(TestFlushSnapshotFromClient.class);
083
084  protected static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
085  protected static final int NUM_RS = 2;
086  protected static final byte[] TEST_FAM = Bytes.toBytes("fam");
087  protected static final TableName TABLE_NAME = TableName.valueOf("test");
088  protected final int DEFAULT_NUM_ROWS = 100;
089  protected Admin admin = null;
090
091  @BeforeClass
092  public static void setupCluster() throws Exception {
093    setupConf(UTIL.getConfiguration());
094    UTIL.startMiniCluster(NUM_RS);
095  }
096
097  protected static void setupConf(Configuration conf) {
098    // disable the ui
099    conf.setInt("hbase.regionsever.info.port", -1);
100    // change the flush size to a small amount, regulating number of store files
101    conf.setInt("hbase.hregion.memstore.flush.size", 25000);
102    // so make sure we get a compaction when doing a load, but keep around some
103    // files in the store
104    conf.setInt("hbase.hstore.compaction.min", 10);
105    conf.setInt("hbase.hstore.compactionThreshold", 10);
106    // block writes if we get to 12 store files
107    conf.setInt("hbase.hstore.blockingStoreFiles", 12);
108    // Enable snapshot
109    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
110    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
111      ConstantSizeRegionSplitPolicy.class.getName());
112  }
113
114  @Before
115  public void setup() throws Exception {
116    createTable();
117    this.admin = UTIL.getConnection().getAdmin();
118  }
119
120  protected void createTable() throws Exception {
121    SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM);
122  }
123
124  @After
125  public void tearDown() throws Exception {
126    UTIL.deleteTable(TABLE_NAME);
127    SnapshotTestingUtils.deleteAllSnapshots(this.admin);
128    this.admin.close();
129    SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
130  }
131
132  @AfterClass
133  public static void cleanupTest() throws Exception {
134    try {
135      UTIL.shutdownMiniCluster();
136    } catch (Exception e) {
137      LOG.warn("failure shutting down cluster", e);
138    }
139  }
140
141  /**
142   * Test simple flush snapshotting a table that is online
143   */
144  @Test
145  public void testFlushTableSnapshot() throws Exception {
146    // make sure we don't fail on listing snapshots
147    SnapshotTestingUtils.assertNoSnapshots(admin);
148
149    // put some stuff in the table
150    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
151
152    LOG.debug("FS state before snapshot:");
153    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
154
155    // take a snapshot of the enabled table
156    String snapshotString = "offlineTableSnapshot";
157    byte[] snapshot = Bytes.toBytes(snapshotString);
158    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH);
159    LOG.debug("Snapshot completed.");
160
161    // make sure we have the snapshot
162    List<SnapshotDescription> snapshots =
163      SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);
164
165    // make sure its a valid snapshot
166    LOG.debug("FS state after snapshot:");
167    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
168
169    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
170      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
171  }
172
173  /**
174   * Test snapshotting a table that is online without flushing
175   */
176  @Test
177  public void testSkipFlushTableSnapshot() throws Exception {
178    // make sure we don't fail on listing snapshots
179    SnapshotTestingUtils.assertNoSnapshots(admin);
180
181    // put some stuff in the table
182    Table table = UTIL.getConnection().getTable(TABLE_NAME);
183    UTIL.loadTable(table, TEST_FAM);
184    UTIL.flush(TABLE_NAME);
185
186    LOG.debug("FS state before snapshot:");
187    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
188
189    // take a snapshot of the enabled table
190    String snapshotString = "skipFlushTableSnapshot";
191    String snapshot = snapshotString;
192    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH);
193    LOG.debug("Snapshot completed.");
194
195    // make sure we have the snapshot
196    List<SnapshotDescription> snapshots =
197      SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);
198
199    // make sure its a valid snapshot
200    LOG.debug("FS state after snapshot:");
201    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
202
203    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
204      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
205
206    admin.deleteSnapshot(snapshot);
207    snapshots = admin.listSnapshots();
208    SnapshotTestingUtils.assertNoSnapshots(admin);
209  }
210
211  /**
212   * Test simple flush snapshotting a table that is online
213   */
214  @Test
215  public void testFlushTableSnapshotWithProcedure() throws Exception {
216    // make sure we don't fail on listing snapshots
217    SnapshotTestingUtils.assertNoSnapshots(admin);
218
219    // put some stuff in the table
220    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
221
222    LOG.debug("FS state before snapshot:");
223    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
224
225    // take a snapshot of the enabled table
226    String snapshotString = "offlineTableSnapshot";
227    byte[] snapshot = Bytes.toBytes(snapshotString);
228    Map<String, String> props = new HashMap<>();
229    props.put("table", TABLE_NAME.getNameAsString());
230    admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, snapshotString,
231      props);
232
233    LOG.debug("Snapshot completed.");
234
235    // make sure we have the snapshot
236    List<SnapshotDescription> snapshots =
237      SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);
238
239    // make sure its a valid snapshot
240    LOG.debug("FS state after snapshot:");
241    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
242
243    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
244      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
245  }
246
247  @Test
248  public void testSnapshotFailsOnNonExistantTable() throws Exception {
249    // make sure we don't fail on listing snapshots
250    SnapshotTestingUtils.assertNoSnapshots(admin);
251    TableName tableName = TableName.valueOf("_not_a_table");
252
253    // make sure the table doesn't exist
254    boolean fail = false;
255    do {
256      try {
257        admin.getDescriptor(tableName);
258        fail = true;
259        LOG.error("Table:" + tableName + " already exists, checking a new name");
260        tableName = TableName.valueOf(tableName + "!");
261      } catch (TableNotFoundException e) {
262        fail = false;
263      }
264    } while (fail);
265
266    // snapshot the non-existant table
267    try {
268      admin.snapshot("fail", tableName, SnapshotType.FLUSH);
269      fail("Snapshot succeeded even though there is not table.");
270    } catch (SnapshotCreationException e) {
271      LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage());
272    }
273  }
274
275  /**
276   * Helper method for testing async snapshot operations. Just waits for the given snapshot to
277   * complete on the server by repeatedly checking the master.
278   * @param master       the master running the snapshot
279   * @param snapshot     the snapshot to check
280   * @param timeoutNanos the timeout in nano between checks to see if the snapshot is done
281   */
282  private static void waitForSnapshotToComplete(HMaster master,
283    SnapshotProtos.SnapshotDescription snapshot, long timeoutNanos) throws Exception {
284    final IsSnapshotDoneRequest request =
285      IsSnapshotDoneRequest.newBuilder().setSnapshot(snapshot).build();
286    long start = System.nanoTime();
287    while (System.nanoTime() - start < timeoutNanos) {
288      try {
289        IsSnapshotDoneResponse done = master.getMasterRpcServices().isSnapshotDone(null, request);
290        if (done.getDone()) {
291          return;
292        }
293      } catch (ServiceException e) {
294        // ignore UnknownSnapshotException, this is possible as for AsyncAdmin, the method will
295        // return immediately after sending out the request, no matter whether the master has
296        // processed the request or not.
297        if (!(e.getCause() instanceof UnknownSnapshotException)) {
298          throw e;
299        }
300      }
301
302      Thread.sleep(200);
303    }
304    throw new TimeoutException("Timeout waiting for snapshot " + snapshot + " to complete");
305  }
306
307  @Test
308  public void testAsyncFlushSnapshot() throws Exception {
309    SnapshotProtos.SnapshotDescription snapshot = SnapshotProtos.SnapshotDescription.newBuilder()
310      .setName("asyncSnapshot").setTable(TABLE_NAME.getNameAsString())
311      .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build();
312
313    // take the snapshot async
314    admin.snapshotAsync(new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH));
315
316    // constantly loop, looking for the snapshot to complete
317    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
318    waitForSnapshotToComplete(master, snapshot, TimeUnit.MINUTES.toNanos(1));
319    LOG.info(" === Async Snapshot Completed ===");
320    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
321
322    // make sure we get the snapshot
323    SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot);
324  }
325
326  @Test
327  public void testSnapshotStateAfterMerge() throws Exception {
328    int numRows = DEFAULT_NUM_ROWS;
329    // make sure we don't fail on listing snapshots
330    SnapshotTestingUtils.assertNoSnapshots(admin);
331    // load the table so we have some data
332    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
333
334    // Take a snapshot
335    String snapshotBeforeMergeName = "snapshotBeforeMerge";
336    admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH);
337
338    // Clone the table
339    TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge");
340    admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName);
341    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName);
342
343    // Merge two regions
344    List<RegionInfo> regions = admin.getRegions(TABLE_NAME);
345    Collections.sort(regions, new Comparator<RegionInfo>() {
346      @Override
347      public int compare(RegionInfo r1, RegionInfo r2) {
348        return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
349      }
350    });
351
352    int numRegions = admin.getRegions(TABLE_NAME).size();
353    int numRegionsAfterMerge = numRegions - 2;
354    admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(),
355      regions.get(2).getEncodedNameAsBytes(), true);
356    admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(),
357      regions.get(5).getEncodedNameAsBytes(), true);
358
359    // Verify that there's one region less
360    waitRegionsAfterMerge(numRegionsAfterMerge);
361    assertEquals(numRegionsAfterMerge, admin.getRegions(TABLE_NAME).size());
362
363    // Clone the table
364    TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge");
365    admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName);
366    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName);
367
368    verifyRowCount(UTIL, TABLE_NAME, numRows);
369    verifyRowCount(UTIL, cloneBeforeMergeName, numRows);
370    verifyRowCount(UTIL, cloneAfterMergeName, numRows);
371
372    // test that we can delete the snapshot
373    UTIL.deleteTable(cloneAfterMergeName);
374    UTIL.deleteTable(cloneBeforeMergeName);
375  }
376
377  @Test
378  public void testTakeSnapshotAfterMerge() throws Exception {
379    int numRows = DEFAULT_NUM_ROWS;
380    // make sure we don't fail on listing snapshots
381    SnapshotTestingUtils.assertNoSnapshots(admin);
382    // load the table so we have some data
383    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
384
385    // Merge two regions
386    List<RegionInfo> regions = admin.getRegions(TABLE_NAME);
387    Collections.sort(regions, new Comparator<RegionInfo>() {
388      @Override
389      public int compare(RegionInfo r1, RegionInfo r2) {
390        return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
391      }
392    });
393
394    int numRegions = admin.getRegions(TABLE_NAME).size();
395    int numRegionsAfterMerge = numRegions - 2;
396    admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(),
397      regions.get(2).getEncodedNameAsBytes(), true);
398    admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(),
399      regions.get(5).getEncodedNameAsBytes(), true);
400
401    waitRegionsAfterMerge(numRegionsAfterMerge);
402    assertEquals(numRegionsAfterMerge, admin.getRegions(TABLE_NAME).size());
403
404    // Take a snapshot
405    String snapshotName = "snapshotAfterMerge";
406    SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3);
407
408    // Clone the table
409    TableName cloneName = TableName.valueOf("cloneMerge");
410    admin.cloneSnapshot(snapshotName, cloneName);
411    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName);
412
413    verifyRowCount(UTIL, TABLE_NAME, numRows);
414    verifyRowCount(UTIL, cloneName, numRows);
415
416    // test that we can delete the snapshot
417    UTIL.deleteTable(cloneName);
418  }
419
420  /**
421   * Basic end-to-end test of simple-flush-based snapshots
422   */
423  @Test
424  public void testFlushCreateListDestroy() throws Exception {
425    LOG.debug("------- Starting Snapshot test -------------");
426    // make sure we don't fail on listing snapshots
427    SnapshotTestingUtils.assertNoSnapshots(admin);
428    // load the table so we have some data
429    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
430
431    String snapshotName = "flushSnapshotCreateListDestroy";
432    FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
433    Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
434    SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM),
435      snapshotName, rootDir, fs, true);
436  }
437
438  private void waitRegionsAfterMerge(final long numRegionsAfterMerge)
439    throws IOException, InterruptedException {
440    // Verify that there's one region less
441    long startTime = EnvironmentEdgeManager.currentTime();
442    while (admin.getRegions(TABLE_NAME).size() != numRegionsAfterMerge) {
443      // This may be flaky... if after 15sec the merge is not complete give up
444      // it will fail in the assertEquals(numRegionsAfterMerge).
445      if ((EnvironmentEdgeManager.currentTime() - startTime) > 15000) {
446        break;
447      }
448      Thread.sleep(100);
449    }
450    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME);
451  }
452
453  protected void verifyRowCount(final HBaseTestingUtil util, final TableName tableName,
454    long expectedRows) throws IOException {
455    SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows);
456  }
457
458  protected int countRows(final Table table, final byte[]... families) throws IOException {
459    return UTIL.countRows(table, families);
460  }
461}