001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.cleaner;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023import static org.junit.Assert.fail;
024
025import java.io.IOException;
026import java.util.Collection;
027import java.util.List;
028import java.util.Set;
029import java.util.regex.Pattern;
030
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtility;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.Waiter;
039import org.apache.hadoop.hbase.client.Admin;
040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
041import org.apache.hadoop.hbase.client.Put;
042import org.apache.hadoop.hbase.client.SnapshotType;
043import org.apache.hadoop.hbase.client.Table;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
046import org.apache.hadoop.hbase.master.HMaster;
047import org.apache.hadoop.hbase.master.snapshot.DisabledTableSnapshotHandler;
048import org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner;
049import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
050import org.apache.hadoop.hbase.regionserver.CompactedHFilesDischarger;
051import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
052import org.apache.hadoop.hbase.regionserver.HRegion;
053import org.apache.hadoop.hbase.regionserver.HRegionServer;
054import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
055import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
056import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
057import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
058import org.apache.hadoop.hbase.testclassification.MasterTests;
059import org.apache.hadoop.hbase.testclassification.MediumTests;
060import org.apache.hadoop.hbase.util.Bytes;
061import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
062import org.apache.hadoop.hbase.util.FSUtils;
063import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
064import org.junit.After;
065import org.junit.AfterClass;
066import org.junit.Before;
067import org.junit.BeforeClass;
068import org.junit.ClassRule;
069import org.junit.Test;
070import org.junit.experimental.categories.Category;
071import org.mockito.Mockito;
072import org.slf4j.Logger;
073import org.slf4j.LoggerFactory;
074
075import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
076
077import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteSnapshotRequest;
078import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest;
079import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsResponse;
080import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
081import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
082import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
083
084/**
085 * Test the master-related aspects of a snapshot
086 */
087@Category({MasterTests.class, MediumTests.class})
088public class TestSnapshotFromMaster {
089
090  @ClassRule
091  public static final HBaseClassTestRule CLASS_RULE =
092      HBaseClassTestRule.forClass(TestSnapshotFromMaster.class);
093
094  private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotFromMaster.class);
095  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
096  private static final int NUM_RS = 2;
097  private static Path rootDir;
098  private static FileSystem fs;
099  private static HMaster master;
100
101  // for hfile archiving test.
102  private static Path archiveDir;
103  private static final byte[] TEST_FAM = Bytes.toBytes("fam");
104  private static final TableName TABLE_NAME =
105      TableName.valueOf("test");
106  // refresh the cache every 1/2 second
107  private static final long cacheRefreshPeriod = 500;
108  private static final int blockingStoreFiles = 12;
109
110  /**
111   * Setup the config for the cluster
112   */
113  @BeforeClass
114  public static void setupCluster() throws Exception {
115    setupConf(UTIL.getConfiguration());
116    UTIL.startMiniCluster(NUM_RS);
117    fs = UTIL.getDFSCluster().getFileSystem();
118    master = UTIL.getMiniHBaseCluster().getMaster();
119    rootDir = master.getMasterFileSystem().getRootDir();
120    archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
121  }
122
123  private static void setupConf(Configuration conf) {
124    // disable the ui
125    conf.setInt("hbase.regionsever.info.port", -1);
126    // change the flush size to a small amount, regulating number of store files
127    conf.setInt("hbase.hregion.memstore.flush.size", 25000);
128    // so make sure we get a compaction when doing a load, but keep around some
129    // files in the store
130    conf.setInt("hbase.hstore.compaction.min", 2);
131    conf.setInt("hbase.hstore.compactionThreshold", 5);
132    // block writes if we get to 12 store files
133    conf.setInt("hbase.hstore.blockingStoreFiles", blockingStoreFiles);
134    // Ensure no extra cleaners on by default (e.g. TimeToLiveHFileCleaner)
135    conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, "");
136    conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, "");
137    // Enable snapshot
138    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
139    conf.setLong(SnapshotManager.HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS, 3 * 1000L);
140    conf.setLong(SnapshotHFileCleaner.HFILE_CACHE_REFRESH_PERIOD_CONF_KEY, cacheRefreshPeriod);
141    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
142      ConstantSizeRegionSplitPolicy.class.getName());
143    conf.setInt("hbase.hfile.compactions.cleaner.interval", 20 * 1000);
144  }
145
146  @Before
147  public void setup() throws Exception {
148    UTIL.createTable(TABLE_NAME, TEST_FAM);
149    master.getSnapshotManager().setSnapshotHandlerForTesting(TABLE_NAME, null);
150  }
151
152  @After
153  public void tearDown() throws Exception {
154    UTIL.deleteTable(TABLE_NAME);
155    SnapshotTestingUtils.deleteAllSnapshots(UTIL.getAdmin());
156    SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
157  }
158
159  @AfterClass
160  public static void cleanupTest() throws Exception {
161    try {
162      UTIL.shutdownMiniCluster();
163    } catch (Exception e) {
164      // NOOP;
165    }
166  }
167
168  /**
169   * Test that the contract from the master for checking on a snapshot are valid.
170   * <p>
171   * <ol>
172   * <li>If a snapshot fails with an error, we expect to get the source error.</li>
173   * <li>If there is no snapshot name supplied, we should get an error.</li>
174   * <li>If asking about a snapshot has hasn't occurred, you should get an error.</li>
175   * </ol>
176   */
177  @Test
178  public void testIsDoneContract() throws Exception {
179
180    IsSnapshotDoneRequest.Builder builder = IsSnapshotDoneRequest.newBuilder();
181
182    String snapshotName = "asyncExpectedFailureTest";
183
184    // check that we get an exception when looking up snapshot where one hasn't happened
185    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
186      UnknownSnapshotException.class);
187
188    // and that we get the same issue, even if we specify a name
189    SnapshotDescription desc = SnapshotDescription.newBuilder()
190      .setName(snapshotName).setTable(TABLE_NAME.getNameAsString()).build();
191    builder.setSnapshot(desc);
192    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
193      UnknownSnapshotException.class);
194
195    // set a mock handler to simulate a snapshot
196    DisabledTableSnapshotHandler mockHandler = Mockito.mock(DisabledTableSnapshotHandler.class);
197    Mockito.when(mockHandler.getException()).thenReturn(null);
198    Mockito.when(mockHandler.getSnapshot()).thenReturn(desc);
199    Mockito.when(mockHandler.isFinished()).thenReturn(Boolean.TRUE);
200    Mockito.when(mockHandler.getCompletionTimestamp())
201      .thenReturn(EnvironmentEdgeManager.currentTime());
202
203    master.getSnapshotManager()
204        .setSnapshotHandlerForTesting(TABLE_NAME, mockHandler);
205
206    // if we do a lookup without a snapshot name, we should fail - you should always know your name
207    builder = IsSnapshotDoneRequest.newBuilder();
208    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
209      UnknownSnapshotException.class);
210
211    // then do the lookup for the snapshot that it is done
212    builder.setSnapshot(desc);
213    IsSnapshotDoneResponse response =
214      master.getMasterRpcServices().isSnapshotDone(null, builder.build());
215    assertTrue("Snapshot didn't complete when it should have.", response.getDone());
216
217    // now try the case where we are looking for a snapshot we didn't take
218    builder.setSnapshot(SnapshotDescription.newBuilder().setName("Not A Snapshot").build());
219    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
220      UnknownSnapshotException.class);
221
222    // then create a snapshot to the fs and make sure that we can find it when checking done
223    snapshotName = "completed";
224    desc = createSnapshot(snapshotName);
225
226    builder.setSnapshot(desc);
227    response = master.getMasterRpcServices().isSnapshotDone(null, builder.build());
228    assertTrue("Completed, on-disk snapshot not found", response.getDone());
229  }
230
231  @Test
232  public void testGetCompletedSnapshots() throws Exception {
233    // first check when there are no snapshots
234    GetCompletedSnapshotsRequest request = GetCompletedSnapshotsRequest.newBuilder().build();
235    GetCompletedSnapshotsResponse response =
236      master.getMasterRpcServices().getCompletedSnapshots(null, request);
237    assertEquals("Found unexpected number of snapshots", 0, response.getSnapshotsCount());
238
239    // write one snapshot to the fs
240    String snapshotName = "completed";
241    SnapshotDescription snapshot = createSnapshot(snapshotName);
242
243    // check that we get one snapshot
244    response = master.getMasterRpcServices().getCompletedSnapshots(null, request);
245    assertEquals("Found unexpected number of snapshots", 1, response.getSnapshotsCount());
246    List<SnapshotDescription> snapshots = response.getSnapshotsList();
247    List<SnapshotDescription> expected = Lists.newArrayList(snapshot);
248    assertEquals("Returned snapshots don't match created snapshots", expected, snapshots);
249
250    // write a second snapshot
251    snapshotName = "completed_two";
252    snapshot = createSnapshot(snapshotName);
253    expected.add(snapshot);
254
255    // check that we get one snapshot
256    response = master.getMasterRpcServices().getCompletedSnapshots(null, request);
257    assertEquals("Found unexpected number of snapshots", 2, response.getSnapshotsCount());
258    snapshots = response.getSnapshotsList();
259    assertEquals("Returned snapshots don't match created snapshots", expected, snapshots);
260  }
261
262  @Test
263  public void testDeleteSnapshot() throws Exception {
264
265    String snapshotName = "completed";
266    SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName(snapshotName).build();
267
268    DeleteSnapshotRequest request = DeleteSnapshotRequest.newBuilder().setSnapshot(snapshot)
269        .build();
270    try {
271      master.getMasterRpcServices().deleteSnapshot(null, request);
272      fail("Master didn't throw exception when attempting to delete snapshot that doesn't exist");
273    } catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException e) {
274      // Expected
275    }
276
277    // write one snapshot to the fs
278    createSnapshot(snapshotName);
279
280    // then delete the existing snapshot,which shouldn't cause an exception to be thrown
281    master.getMasterRpcServices().deleteSnapshot(null, request);
282  }
283
284  /**
285   * Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots
286   * should be retained, while those that are not in a snapshot should be deleted.
287   * @throws Exception on failure
288   */
289  @Test
290  public void testSnapshotHFileArchiving() throws Exception {
291    Admin admin = UTIL.getAdmin();
292    // make sure we don't fail on listing snapshots
293    SnapshotTestingUtils.assertNoSnapshots(admin);
294
295    // recreate test table with disabled compactions; otherwise compaction may happen before
296    // snapshot, the call after snapshot will be a no-op and checks will fail
297    UTIL.deleteTable(TABLE_NAME);
298    TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME)
299            .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAM))
300            .setCompactionEnabled(false)
301            .build();
302    UTIL.getAdmin().createTable(td);
303
304    // load the table
305    for (int i = 0; i < blockingStoreFiles / 2; i ++) {
306      UTIL.loadTable(UTIL.getConnection().getTable(TABLE_NAME), TEST_FAM);
307      UTIL.flush(TABLE_NAME);
308    }
309
310    // disable the table so we can take a snapshot
311    admin.disableTable(TABLE_NAME);
312
313    // take a snapshot of the table
314    String snapshotName = "snapshot";
315    byte[] snapshotNameBytes = Bytes.toBytes(snapshotName);
316    admin.snapshot(snapshotNameBytes, TABLE_NAME);
317
318    LOG.info("After snapshot File-System state");
319    FSUtils.logFileSystemState(fs, rootDir, LOG);
320
321    // ensure we only have one snapshot
322    SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshotNameBytes, TABLE_NAME);
323
324    td = TableDescriptorBuilder.newBuilder(td)
325            .setCompactionEnabled(true)
326            .build();
327    // enable compactions now
328    admin.modifyTable(td);
329
330    // renable the table so we can compact the regions
331    admin.enableTable(TABLE_NAME);
332
333    // compact the files so we get some archived files for the table we just snapshotted
334    List<HRegion> regions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
335    for (HRegion region : regions) {
336      region.waitForFlushesAndCompactions(); // enable can trigger a compaction, wait for it.
337      region.compactStores(); // min is 2 so will compact and archive
338    }
339    List<RegionServerThread> regionServerThreads = UTIL.getMiniHBaseCluster()
340        .getRegionServerThreads();
341    HRegionServer hrs = null;
342    for (RegionServerThread rs : regionServerThreads) {
343      if (!rs.getRegionServer().getRegions(TABLE_NAME).isEmpty()) {
344        hrs = rs.getRegionServer();
345        break;
346      }
347    }
348    CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null, hrs, false);
349    cleaner.chore();
350    LOG.info("After compaction File-System state");
351    FSUtils.logFileSystemState(fs, rootDir, LOG);
352
353    // make sure the cleaner has run
354    LOG.debug("Running hfile cleaners");
355    ensureHFileCleanersRun();
356    LOG.info("After cleaners File-System state: " + rootDir);
357    FSUtils.logFileSystemState(fs, rootDir, LOG);
358
359    // get the snapshot files for the table
360    Path snapshotTable = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
361    Set<String> snapshotHFiles = SnapshotReferenceUtil.getHFileNames(
362        UTIL.getConfiguration(), fs, snapshotTable);
363    // check that the files in the archive contain the ones that we need for the snapshot
364    LOG.debug("Have snapshot hfiles:");
365    for (String fileName : snapshotHFiles) {
366      LOG.debug(fileName);
367    }
368    // get the archived files for the table
369    Collection<String> archives = getHFiles(archiveDir, fs, TABLE_NAME);
370
371    // get the hfiles for the table
372    Collection<String> hfiles = getHFiles(rootDir, fs, TABLE_NAME);
373
374    // and make sure that there is a proper subset
375    for (String fileName : snapshotHFiles) {
376      boolean exist = archives.contains(fileName) || hfiles.contains(fileName);
377      assertTrue("Archived hfiles " + archives
378        + " and table hfiles " + hfiles + " is missing snapshot file:" + fileName, exist);
379    }
380
381    // delete the existing snapshot
382    admin.deleteSnapshot(snapshotNameBytes);
383    SnapshotTestingUtils.assertNoSnapshots(admin);
384
385    // make sure that we don't keep around the hfiles that aren't in a snapshot
386    // make sure we wait long enough to refresh the snapshot hfile
387    List<BaseHFileCleanerDelegate> delegates = UTIL.getMiniHBaseCluster().getMaster()
388        .getHFileCleaner().cleanersChain;
389    for (BaseHFileCleanerDelegate delegate: delegates) {
390      if (delegate instanceof SnapshotHFileCleaner) {
391        ((SnapshotHFileCleaner)delegate).getFileCacheForTesting().triggerCacheRefreshForTesting();
392      }
393    }
394    // run the cleaner again
395    LOG.debug("Running hfile cleaners");
396    ensureHFileCleanersRun();
397    LOG.info("After delete snapshot cleaners run File-System state");
398    FSUtils.logFileSystemState(fs, rootDir, LOG);
399
400    archives = getHFiles(archiveDir, fs, TABLE_NAME);
401    assertEquals("Still have some hfiles in the archive, when their snapshot has been deleted.", 0,
402      archives.size());
403  }
404
405  /**
406   * @return all the HFiles for a given table in the specified dir
407   * @throws IOException on expected failure
408   */
409  private final Collection<String> getHFiles(Path dir, FileSystem fs, TableName tableName) throws IOException {
410    Path tableDir = FSUtils.getTableDir(dir, tableName);
411    return SnapshotTestingUtils.listHFileNames(fs, tableDir);
412  }
413
414  /**
415   * Make sure the {@link HFileCleaner HFileCleaners} run at least once
416   */
417  private static void ensureHFileCleanersRun() {
418    UTIL.getHBaseCluster().getMaster().getHFileCleaner().chore();
419  }
420
421  private SnapshotDescription createSnapshot(final String snapshotName) throws IOException {
422    SnapshotTestingUtils.SnapshotMock snapshotMock =
423      new SnapshotTestingUtils.SnapshotMock(UTIL.getConfiguration(), fs, rootDir);
424    SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder =
425      snapshotMock.createSnapshotV2(snapshotName, "test", 0);
426    builder.commit();
427    return builder.getSnapshotDescription();
428  }
429
430  @Test
431  public void testAsyncSnapshotWillNotBlockSnapshotHFileCleaner() throws Exception {
432    // Write some data
433    Table table = UTIL.getConnection().getTable(TABLE_NAME);
434    for (int i = 0; i < 10; i++) {
435      Put put = new Put(Bytes.toBytes(i)).addColumn(TEST_FAM, Bytes.toBytes("q"), Bytes.toBytes(i));
436      table.put(put);
437    }
438    String snapshotName = "testAsyncSnapshotWillNotBlockSnapshotHFileCleaner01";
439    UTIL.getAdmin().snapshotAsync(new org.apache.hadoop.hbase.client.SnapshotDescription(
440        snapshotName, TABLE_NAME, SnapshotType.FLUSH));
441    Waiter.waitFor(UTIL.getConfiguration(), 10 * 1000L, 200L,
442      () -> UTIL.getAdmin().listSnapshots(Pattern.compile(snapshotName)).size() == 1);
443    assertTrue(master.getSnapshotManager().isTakingAnySnapshot());
444    Thread.sleep(11 * 1000L);
445    assertFalse(master.getSnapshotManager().isTakingAnySnapshot());
446  }
447}