001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.snapshot;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.fail;
022
023import java.io.IOException;
024import java.util.Collections;
025import java.util.Comparator;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.concurrent.TimeUnit;
030import java.util.concurrent.TimeoutException;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtility;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.HRegionInfo;
038import org.apache.hadoop.hbase.TableName;
039import org.apache.hadoop.hbase.TableNotFoundException;
040import org.apache.hadoop.hbase.client.Admin;
041import org.apache.hadoop.hbase.client.SnapshotDescription;
042import org.apache.hadoop.hbase.client.SnapshotType;
043import org.apache.hadoop.hbase.client.Table;
044import org.apache.hadoop.hbase.master.HMaster;
045import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
046import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.testclassification.RegionServerTests;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.junit.After;
051import org.junit.AfterClass;
052import org.junit.Before;
053import org.junit.BeforeClass;
054import org.junit.ClassRule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
061import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
062
063/**
064 * Test creating/using/deleting snapshots from the client
065 * <p>
066 * This is an end-to-end test for the snapshot utility
067 *
068 * TODO This is essentially a clone of TestSnapshotFromClient.  This is worth refactoring this
069 * because there will be a few more flavors of snapshots that need to run these tests.
070 */
071@Category({RegionServerTests.class, LargeTests.class})
072public class TestFlushSnapshotFromClient {
073
074  @ClassRule
075  public static final HBaseClassTestRule CLASS_RULE =
076      HBaseClassTestRule.forClass(TestFlushSnapshotFromClient.class);
077
078  private static final Logger LOG = LoggerFactory.getLogger(TestFlushSnapshotFromClient.class);
079
080  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
081  protected static final int NUM_RS = 2;
082  protected static final byte[] TEST_FAM = Bytes.toBytes("fam");
083  protected static final TableName TABLE_NAME = TableName.valueOf("test");
084  protected final int DEFAULT_NUM_ROWS = 100;
085  protected Admin admin = null;
086
087  @BeforeClass
088  public static void setupCluster() throws Exception {
089    setupConf(UTIL.getConfiguration());
090    UTIL.startMiniCluster(NUM_RS);
091  }
092
093  protected static void setupConf(Configuration conf) {
094    // disable the ui
095    conf.setInt("hbase.regionsever.info.port", -1);
096    // change the flush size to a small amount, regulating number of store files
097    conf.setInt("hbase.hregion.memstore.flush.size", 25000);
098    // so make sure we get a compaction when doing a load, but keep around some
099    // files in the store
100    conf.setInt("hbase.hstore.compaction.min", 10);
101    conf.setInt("hbase.hstore.compactionThreshold", 10);
102    // block writes if we get to 12 store files
103    conf.setInt("hbase.hstore.blockingStoreFiles", 12);
104    // Enable snapshot
105    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
106    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
107        ConstantSizeRegionSplitPolicy.class.getName());
108  }
109
110  @Before
111  public void setup() throws Exception {
112    createTable();
113    this.admin = UTIL.getConnection().getAdmin();
114  }
115
116  protected void createTable() throws Exception {
117    SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM);
118  }
119
120  @After
121  public void tearDown() throws Exception {
122    UTIL.deleteTable(TABLE_NAME);
123    SnapshotTestingUtils.deleteAllSnapshots(this.admin);
124    this.admin.close();
125    SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
126  }
127
128  @AfterClass
129  public static void cleanupTest() throws Exception {
130    try {
131      UTIL.shutdownMiniCluster();
132    } catch (Exception e) {
133      LOG.warn("failure shutting down cluster", e);
134    }
135  }
136
137  /**
138   * Test simple flush snapshotting a table that is online
139   */
140  @Test
141  public void testFlushTableSnapshot() throws Exception {
142    // make sure we don't fail on listing snapshots
143    SnapshotTestingUtils.assertNoSnapshots(admin);
144
145    // put some stuff in the table
146    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
147
148    LOG.debug("FS state before snapshot:");
149    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
150
151    // take a snapshot of the enabled table
152    String snapshotString = "offlineTableSnapshot";
153    byte[] snapshot = Bytes.toBytes(snapshotString);
154    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH);
155    LOG.debug("Snapshot completed.");
156
157    // make sure we have the snapshot
158    List<SnapshotDescription> snapshots =
159        SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);
160
161    // make sure its a valid snapshot
162    LOG.debug("FS state after snapshot:");
163    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
164
165    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
166      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
167  }
168
169   /**
170   * Test snapshotting a table that is online without flushing
171   */
172  @Test
173  public void testSkipFlushTableSnapshot() throws Exception {
174    // make sure we don't fail on listing snapshots
175    SnapshotTestingUtils.assertNoSnapshots(admin);
176
177    // put some stuff in the table
178    Table table = UTIL.getConnection().getTable(TABLE_NAME);
179    UTIL.loadTable(table, TEST_FAM);
180    UTIL.flush(TABLE_NAME);
181
182    LOG.debug("FS state before snapshot:");
183    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
184
185    // take a snapshot of the enabled table
186    String snapshotString = "skipFlushTableSnapshot";
187    byte[] snapshot = Bytes.toBytes(snapshotString);
188    admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH);
189    LOG.debug("Snapshot completed.");
190
191    // make sure we have the snapshot
192    List<SnapshotDescription> snapshots =
193        SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME);
194
195    // make sure its a valid snapshot
196    LOG.debug("FS state after snapshot:");
197    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
198
199    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
200      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
201
202    admin.deleteSnapshot(snapshot);
203    snapshots = admin.listSnapshots();
204    SnapshotTestingUtils.assertNoSnapshots(admin);
205  }
206
207
208  /**
209   * Test simple flush snapshotting a table that is online
210   */
211  @Test
212  public void testFlushTableSnapshotWithProcedure() throws Exception {
213    // make sure we don't fail on listing snapshots
214    SnapshotTestingUtils.assertNoSnapshots(admin);
215
216    // put some stuff in the table
217    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
218
219    LOG.debug("FS state before snapshot:");
220    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
221
222    // take a snapshot of the enabled table
223    String snapshotString = "offlineTableSnapshot";
224    byte[] snapshot = Bytes.toBytes(snapshotString);
225    Map<String, String> props = new HashMap<>();
226    props.put("table", TABLE_NAME.getNameAsString());
227    admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION,
228        snapshotString, props);
229
230
231    LOG.debug("Snapshot completed.");
232
233    // make sure we have the snapshot
234    List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
235      snapshot, TABLE_NAME);
236
237    // make sure its a valid snapshot
238    LOG.debug("FS state after snapshot:");
239    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
240
241    SnapshotTestingUtils.confirmSnapshotValid(UTIL,
242      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM);
243  }
244
245  @Test
246  public void testSnapshotFailsOnNonExistantTable() throws Exception {
247    // make sure we don't fail on listing snapshots
248    SnapshotTestingUtils.assertNoSnapshots(admin);
249    TableName tableName = TableName.valueOf("_not_a_table");
250
251    // make sure the table doesn't exist
252    boolean fail = false;
253    do {
254    try {
255      admin.getTableDescriptor(tableName);
256      fail = true;
257      LOG.error("Table:" + tableName + " already exists, checking a new name");
258      tableName = TableName.valueOf(tableName+"!");
259    } catch (TableNotFoundException e) {
260      fail = false;
261      }
262    } while (fail);
263
264    // snapshot the non-existant table
265    try {
266      admin.snapshot("fail", tableName, SnapshotType.FLUSH);
267      fail("Snapshot succeeded even though there is not table.");
268    } catch (SnapshotCreationException e) {
269      LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage());
270    }
271  }
272
273  @Test
274  public void testAsyncFlushSnapshot() throws Exception {
275    SnapshotProtos.SnapshotDescription snapshot = SnapshotProtos.SnapshotDescription.newBuilder()
276        .setName("asyncSnapshot").setTable(TABLE_NAME.getNameAsString())
277        .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build();
278
279    // take the snapshot async
280    admin.takeSnapshotAsync(
281      new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH));
282
283    // constantly loop, looking for the snapshot to complete
284    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
285    SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200);
286    LOG.info(" === Async Snapshot Completed ===");
287    UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
288
289    // make sure we get the snapshot
290    SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot);
291  }
292
293  @Test
294  public void testSnapshotStateAfterMerge() throws Exception {
295    int numRows = DEFAULT_NUM_ROWS;
296    // make sure we don't fail on listing snapshots
297    SnapshotTestingUtils.assertNoSnapshots(admin);
298    // load the table so we have some data
299    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
300
301    // Take a snapshot
302    String snapshotBeforeMergeName = "snapshotBeforeMerge";
303    admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH);
304
305    // Clone the table
306    TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge");
307    admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName);
308    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName);
309
310    // Merge two regions
311    List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
312    Collections.sort(regions, new Comparator<HRegionInfo>() {
313      @Override
314      public int compare(HRegionInfo r1, HRegionInfo r2) {
315        return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
316      }
317    });
318
319    int numRegions = admin.getTableRegions(TABLE_NAME).size();
320    int numRegionsAfterMerge = numRegions - 2;
321    admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(),
322        regions.get(2).getEncodedNameAsBytes(), true);
323    admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(),
324        regions.get(5).getEncodedNameAsBytes(), true);
325
326    // Verify that there's one region less
327    waitRegionsAfterMerge(numRegionsAfterMerge);
328    assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());
329
330    // Clone the table
331    TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge");
332    admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName);
333    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName);
334
335    verifyRowCount(UTIL, TABLE_NAME, numRows);
336    verifyRowCount(UTIL, cloneBeforeMergeName, numRows);
337    verifyRowCount(UTIL, cloneAfterMergeName, numRows);
338
339    // test that we can delete the snapshot
340    UTIL.deleteTable(cloneAfterMergeName);
341    UTIL.deleteTable(cloneBeforeMergeName);
342  }
343
344  @Test
345  public void testTakeSnapshotAfterMerge() throws Exception {
346    int numRows = DEFAULT_NUM_ROWS;
347    // make sure we don't fail on listing snapshots
348    SnapshotTestingUtils.assertNoSnapshots(admin);
349    // load the table so we have some data
350    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
351
352    // Merge two regions
353    List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
354    Collections.sort(regions, new Comparator<HRegionInfo>() {
355      @Override
356      public int compare(HRegionInfo r1, HRegionInfo r2) {
357        return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
358      }
359    });
360
361    int numRegions = admin.getTableRegions(TABLE_NAME).size();
362    int numRegionsAfterMerge = numRegions - 2;
363    admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(),
364        regions.get(2).getEncodedNameAsBytes(), true);
365    admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(),
366        regions.get(5).getEncodedNameAsBytes(), true);
367
368    waitRegionsAfterMerge(numRegionsAfterMerge);
369    assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());
370
371    // Take a snapshot
372    String snapshotName = "snapshotAfterMerge";
373    SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3);
374
375    // Clone the table
376    TableName cloneName = TableName.valueOf("cloneMerge");
377    admin.cloneSnapshot(snapshotName, cloneName);
378    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName);
379
380    verifyRowCount(UTIL, TABLE_NAME, numRows);
381    verifyRowCount(UTIL, cloneName, numRows);
382
383    // test that we can delete the snapshot
384    UTIL.deleteTable(cloneName);
385  }
386
387  /**
388   * Basic end-to-end test of simple-flush-based snapshots
389   */
390  @Test
391  public void testFlushCreateListDestroy() throws Exception {
392    LOG.debug("------- Starting Snapshot test -------------");
393    // make sure we don't fail on listing snapshots
394    SnapshotTestingUtils.assertNoSnapshots(admin);
395    // load the table so we have some data
396    SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
397
398    String snapshotName = "flushSnapshotCreateListDestroy";
399    FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
400    Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
401    SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM),
402      snapshotName, rootDir, fs, true);
403  }
404
405  private void waitRegionsAfterMerge(final long numRegionsAfterMerge)
406      throws IOException, InterruptedException {
407    // Verify that there's one region less
408    long startTime = System.currentTimeMillis();
409    while (admin.getTableRegions(TABLE_NAME).size() != numRegionsAfterMerge) {
410      // This may be flaky... if after 15sec the merge is not complete give up
411      // it will fail in the assertEquals(numRegionsAfterMerge).
412      if ((System.currentTimeMillis() - startTime) > 15000)
413        break;
414      Thread.sleep(100);
415    }
416    SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME);
417  }
418
419
420  protected void verifyRowCount(final HBaseTestingUtility util, final TableName tableName,
421      long expectedRows) throws IOException {
422    SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows);
423  }
424
425  protected int countRows(final Table table, final byte[]... families) throws IOException {
426    return UTIL.countRows(table, families);
427  }
428}