001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.cleaner;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022import static org.junit.Assert.fail;
023
024import java.io.IOException;
025import java.util.Collection;
026import java.util.List;
027import java.util.Set;
028import java.util.regex.Pattern;
029
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HBaseTestingUtility;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.Waiter;
038import org.apache.hadoop.hbase.client.Admin;
039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
040import org.apache.hadoop.hbase.client.Put;
041import org.apache.hadoop.hbase.client.SnapshotType;
042import org.apache.hadoop.hbase.client.Table;
043import org.apache.hadoop.hbase.client.TableDescriptor;
044import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
045import org.apache.hadoop.hbase.master.HMaster;
046import org.apache.hadoop.hbase.master.snapshot.DisabledTableSnapshotHandler;
047import org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner;
048import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
049import org.apache.hadoop.hbase.regionserver.CompactedHFilesDischarger;
050import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
051import org.apache.hadoop.hbase.regionserver.HRegion;
052import org.apache.hadoop.hbase.regionserver.HRegionServer;
053import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
054import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
055import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
056import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
057import org.apache.hadoop.hbase.testclassification.MasterTests;
058import org.apache.hadoop.hbase.testclassification.MediumTests;
059import org.apache.hadoop.hbase.util.Bytes;
060import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
061import org.apache.hadoop.hbase.util.FSUtils;
062import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
063import org.junit.After;
064import org.junit.AfterClass;
065import org.junit.Before;
066import org.junit.BeforeClass;
067import org.junit.ClassRule;
068import org.junit.Test;
069import org.junit.experimental.categories.Category;
070import org.mockito.Mockito;
071import org.slf4j.Logger;
072import org.slf4j.LoggerFactory;
073
074import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
075
076import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteSnapshotRequest;
077import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest;
078import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsResponse;
079import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
080import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
081import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
082
083/**
084 * Test the master-related aspects of a snapshot
085 */
086@Category({MasterTests.class, MediumTests.class})
087public class TestSnapshotFromMaster {
088
089  @ClassRule
090  public static final HBaseClassTestRule CLASS_RULE =
091      HBaseClassTestRule.forClass(TestSnapshotFromMaster.class);
092
093  private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotFromMaster.class);
094  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
095  private static final int NUM_RS = 2;
096  private static Path rootDir;
097  private static FileSystem fs;
098  private static HMaster master;
099
100  // for hfile archiving test.
101  private static Path archiveDir;
102  private static final byte[] TEST_FAM = Bytes.toBytes("fam");
103  private static final TableName TABLE_NAME =
104      TableName.valueOf("test");
105  // refresh the cache every 1/2 second
106  private static final long cacheRefreshPeriod = 500;
107  private static final int blockingStoreFiles = 12;
108
109  /**
110   * Setup the config for the cluster
111   */
112  @BeforeClass
113  public static void setupCluster() throws Exception {
114    setupConf(UTIL.getConfiguration());
115    UTIL.startMiniCluster(NUM_RS);
116    fs = UTIL.getDFSCluster().getFileSystem();
117    master = UTIL.getMiniHBaseCluster().getMaster();
118    rootDir = master.getMasterFileSystem().getRootDir();
119    archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
120  }
121
122  private static void setupConf(Configuration conf) {
123    // disable the ui
124    conf.setInt("hbase.regionsever.info.port", -1);
125    // change the flush size to a small amount, regulating number of store files
126    conf.setInt("hbase.hregion.memstore.flush.size", 25000);
127    // so make sure we get a compaction when doing a load, but keep around some
128    // files in the store
129    conf.setInt("hbase.hstore.compaction.min", 2);
130    conf.setInt("hbase.hstore.compactionThreshold", 5);
131    // block writes if we get to 12 store files
132    conf.setInt("hbase.hstore.blockingStoreFiles", blockingStoreFiles);
133    // Ensure no extra cleaners on by default (e.g. TimeToLiveHFileCleaner)
134    conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, "");
135    conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, "");
136    // Enable snapshot
137    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
138    conf.setLong(SnapshotManager.HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS, 3 * 1000L);
139    conf.setLong(SnapshotHFileCleaner.HFILE_CACHE_REFRESH_PERIOD_CONF_KEY, cacheRefreshPeriod);
140    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
141      ConstantSizeRegionSplitPolicy.class.getName());
142    conf.setInt("hbase.hfile.compactions.cleaner.interval", 20 * 1000);
143  }
144
145  @Before
146  public void setup() throws Exception {
147    UTIL.createTable(TABLE_NAME, TEST_FAM);
148    master.getSnapshotManager().setSnapshotHandlerForTesting(TABLE_NAME, null);
149  }
150
151  @After
152  public void tearDown() throws Exception {
153    UTIL.deleteTable(TABLE_NAME);
154    SnapshotTestingUtils.deleteAllSnapshots(UTIL.getAdmin());
155    SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
156  }
157
158  @AfterClass
159  public static void cleanupTest() throws Exception {
160    try {
161      UTIL.shutdownMiniCluster();
162    } catch (Exception e) {
163      // NOOP;
164    }
165  }
166
167  /**
168   * Test that the contract from the master for checking on a snapshot are valid.
169   * <p>
170   * <ol>
171   * <li>If a snapshot fails with an error, we expect to get the source error.</li>
172   * <li>If there is no snapshot name supplied, we should get an error.</li>
173   * <li>If asking about a snapshot has hasn't occurred, you should get an error.</li>
174   * </ol>
175   */
176  @Test
177  public void testIsDoneContract() throws Exception {
178
179    IsSnapshotDoneRequest.Builder builder = IsSnapshotDoneRequest.newBuilder();
180
181    String snapshotName = "asyncExpectedFailureTest";
182
183    // check that we get an exception when looking up snapshot where one hasn't happened
184    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
185      UnknownSnapshotException.class);
186
187    // and that we get the same issue, even if we specify a name
188    SnapshotDescription desc = SnapshotDescription.newBuilder()
189      .setName(snapshotName).setTable(TABLE_NAME.getNameAsString()).build();
190    builder.setSnapshot(desc);
191    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
192      UnknownSnapshotException.class);
193
194    // set a mock handler to simulate a snapshot
195    DisabledTableSnapshotHandler mockHandler = Mockito.mock(DisabledTableSnapshotHandler.class);
196    Mockito.when(mockHandler.getException()).thenReturn(null);
197    Mockito.when(mockHandler.getSnapshot()).thenReturn(desc);
198    Mockito.when(mockHandler.isFinished()).thenReturn(Boolean.TRUE);
199    Mockito.when(mockHandler.getCompletionTimestamp())
200      .thenReturn(EnvironmentEdgeManager.currentTime());
201
202    master.getSnapshotManager()
203        .setSnapshotHandlerForTesting(TABLE_NAME, mockHandler);
204
205    // if we do a lookup without a snapshot name, we should fail - you should always know your name
206    builder = IsSnapshotDoneRequest.newBuilder();
207    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
208      UnknownSnapshotException.class);
209
210    // then do the lookup for the snapshot that it is done
211    builder.setSnapshot(desc);
212    IsSnapshotDoneResponse response =
213      master.getMasterRpcServices().isSnapshotDone(null, builder.build());
214    assertTrue("Snapshot didn't complete when it should have.", response.getDone());
215
216    // now try the case where we are looking for a snapshot we didn't take
217    builder.setSnapshot(SnapshotDescription.newBuilder().setName("Not A Snapshot").build());
218    SnapshotTestingUtils.expectSnapshotDoneException(master, builder.build(),
219      UnknownSnapshotException.class);
220
221    // then create a snapshot to the fs and make sure that we can find it when checking done
222    snapshotName = "completed";
223    desc = createSnapshot(snapshotName);
224
225    builder.setSnapshot(desc);
226    response = master.getMasterRpcServices().isSnapshotDone(null, builder.build());
227    assertTrue("Completed, on-disk snapshot not found", response.getDone());
228  }
229
230  @Test
231  public void testGetCompletedSnapshots() throws Exception {
232    // first check when there are no snapshots
233    GetCompletedSnapshotsRequest request = GetCompletedSnapshotsRequest.newBuilder().build();
234    GetCompletedSnapshotsResponse response =
235      master.getMasterRpcServices().getCompletedSnapshots(null, request);
236    assertEquals("Found unexpected number of snapshots", 0, response.getSnapshotsCount());
237
238    // write one snapshot to the fs
239    String snapshotName = "completed";
240    SnapshotDescription snapshot = createSnapshot(snapshotName);
241
242    // check that we get one snapshot
243    response = master.getMasterRpcServices().getCompletedSnapshots(null, request);
244    assertEquals("Found unexpected number of snapshots", 1, response.getSnapshotsCount());
245    List<SnapshotDescription> snapshots = response.getSnapshotsList();
246    List<SnapshotDescription> expected = Lists.newArrayList(snapshot);
247    assertEquals("Returned snapshots don't match created snapshots", expected, snapshots);
248
249    // write a second snapshot
250    snapshotName = "completed_two";
251    snapshot = createSnapshot(snapshotName);
252    expected.add(snapshot);
253
254    // check that we get one snapshot
255    response = master.getMasterRpcServices().getCompletedSnapshots(null, request);
256    assertEquals("Found unexpected number of snapshots", 2, response.getSnapshotsCount());
257    snapshots = response.getSnapshotsList();
258    assertEquals("Returned snapshots don't match created snapshots", expected, snapshots);
259  }
260
261  @Test
262  public void testDeleteSnapshot() throws Exception {
263
264    String snapshotName = "completed";
265    SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName(snapshotName).build();
266
267    DeleteSnapshotRequest request = DeleteSnapshotRequest.newBuilder().setSnapshot(snapshot)
268        .build();
269    try {
270      master.getMasterRpcServices().deleteSnapshot(null, request);
271      fail("Master didn't throw exception when attempting to delete snapshot that doesn't exist");
272    } catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException e) {
273      // Expected
274    }
275
276    // write one snapshot to the fs
277    createSnapshot(snapshotName);
278
279    // then delete the existing snapshot,which shouldn't cause an exception to be thrown
280    master.getMasterRpcServices().deleteSnapshot(null, request);
281  }
282
283  /**
284   * Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots
285   * should be retained, while those that are not in a snapshot should be deleted.
286   * @throws Exception on failure
287   */
288  @Test
289  public void testSnapshotHFileArchiving() throws Exception {
290    Admin admin = UTIL.getAdmin();
291    // make sure we don't fail on listing snapshots
292    SnapshotTestingUtils.assertNoSnapshots(admin);
293
294    // recreate test table with disabled compactions; otherwise compaction may happen before
295    // snapshot, the call after snapshot will be a no-op and checks will fail
296    UTIL.deleteTable(TABLE_NAME);
297    TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME)
298            .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAM))
299            .setCompactionEnabled(false)
300            .build();
301    UTIL.getAdmin().createTable(td);
302
303    // load the table
304    for (int i = 0; i < blockingStoreFiles / 2; i ++) {
305      UTIL.loadTable(UTIL.getConnection().getTable(TABLE_NAME), TEST_FAM);
306      UTIL.flush(TABLE_NAME);
307    }
308
309    // disable the table so we can take a snapshot
310    admin.disableTable(TABLE_NAME);
311
312    // take a snapshot of the table
313    String snapshotName = "snapshot";
314    byte[] snapshotNameBytes = Bytes.toBytes(snapshotName);
315    admin.snapshot(snapshotNameBytes, TABLE_NAME);
316
317    LOG.info("After snapshot File-System state");
318    FSUtils.logFileSystemState(fs, rootDir, LOG);
319
320    // ensure we only have one snapshot
321    SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshotNameBytes, TABLE_NAME);
322
323    td = TableDescriptorBuilder.newBuilder(td)
324            .setCompactionEnabled(true)
325            .build();
326    // enable compactions now
327    admin.modifyTable(td);
328
329    // renable the table so we can compact the regions
330    admin.enableTable(TABLE_NAME);
331
332    // compact the files so we get some archived files for the table we just snapshotted
333    List<HRegion> regions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
334    for (HRegion region : regions) {
335      region.waitForFlushesAndCompactions(); // enable can trigger a compaction, wait for it.
336      region.compactStores(); // min is 2 so will compact and archive
337    }
338    List<RegionServerThread> regionServerThreads = UTIL.getMiniHBaseCluster()
339        .getRegionServerThreads();
340    HRegionServer hrs = null;
341    for (RegionServerThread rs : regionServerThreads) {
342      if (!rs.getRegionServer().getRegions(TABLE_NAME).isEmpty()) {
343        hrs = rs.getRegionServer();
344        break;
345      }
346    }
347    CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null, hrs, false);
348    cleaner.chore();
349    LOG.info("After compaction File-System state");
350    FSUtils.logFileSystemState(fs, rootDir, LOG);
351
352    // make sure the cleaner has run
353    LOG.debug("Running hfile cleaners");
354    ensureHFileCleanersRun();
355    LOG.info("After cleaners File-System state: " + rootDir);
356    FSUtils.logFileSystemState(fs, rootDir, LOG);
357
358    // get the snapshot files for the table
359    Path snapshotTable = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
360    Set<String> snapshotHFiles = SnapshotReferenceUtil.getHFileNames(
361        UTIL.getConfiguration(), fs, snapshotTable);
362    // check that the files in the archive contain the ones that we need for the snapshot
363    LOG.debug("Have snapshot hfiles:");
364    for (String fileName : snapshotHFiles) {
365      LOG.debug(fileName);
366    }
367    // get the archived files for the table
368    Collection<String> archives = getHFiles(archiveDir, fs, TABLE_NAME);
369
370    // get the hfiles for the table
371    Collection<String> hfiles = getHFiles(rootDir, fs, TABLE_NAME);
372
373    // and make sure that there is a proper subset
374    for (String fileName : snapshotHFiles) {
375      boolean exist = archives.contains(fileName) || hfiles.contains(fileName);
376      assertTrue("Archived hfiles " + archives
377        + " and table hfiles " + hfiles + " is missing snapshot file:" + fileName, exist);
378    }
379
380    // delete the existing snapshot
381    admin.deleteSnapshot(snapshotNameBytes);
382    SnapshotTestingUtils.assertNoSnapshots(admin);
383
384    // make sure that we don't keep around the hfiles that aren't in a snapshot
385    // make sure we wait long enough to refresh the snapshot hfile
386    List<BaseHFileCleanerDelegate> delegates = UTIL.getMiniHBaseCluster().getMaster()
387        .getHFileCleaner().cleanersChain;
388    for (BaseHFileCleanerDelegate delegate: delegates) {
389      if (delegate instanceof SnapshotHFileCleaner) {
390        ((SnapshotHFileCleaner)delegate).getFileCacheForTesting().triggerCacheRefreshForTesting();
391      }
392    }
393    // run the cleaner again
394    LOG.debug("Running hfile cleaners");
395    ensureHFileCleanersRun();
396    LOG.info("After delete snapshot cleaners run File-System state");
397    FSUtils.logFileSystemState(fs, rootDir, LOG);
398
399    archives = getHFiles(archiveDir, fs, TABLE_NAME);
400    assertEquals("Still have some hfiles in the archive, when their snapshot has been deleted.", 0,
401      archives.size());
402  }
403
404  /**
405   * @return all the HFiles for a given table in the specified dir
406   * @throws IOException on expected failure
407   */
408  private final Collection<String> getHFiles(Path dir, FileSystem fs, TableName tableName) throws IOException {
409    Path tableDir = FSUtils.getTableDir(dir, tableName);
410    return SnapshotTestingUtils.listHFileNames(fs, tableDir);
411  }
412
413  /**
414   * Make sure the {@link HFileCleaner HFileCleaners} run at least once
415   */
416  private static void ensureHFileCleanersRun() {
417    UTIL.getHBaseCluster().getMaster().getHFileCleaner().chore();
418  }
419
420  private SnapshotDescription createSnapshot(final String snapshotName) throws IOException {
421    SnapshotTestingUtils.SnapshotMock snapshotMock =
422      new SnapshotTestingUtils.SnapshotMock(UTIL.getConfiguration(), fs, rootDir);
423    SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder =
424      snapshotMock.createSnapshotV2(snapshotName, "test", 0);
425    builder.commit();
426    return builder.getSnapshotDescription();
427  }
428
429  @Test
430  public void testAsyncSnapshotWillNotBlockSnapshotHFileCleaner() throws Exception {
431    // Write some data
432    Table table = UTIL.getConnection().getTable(TABLE_NAME);
433    for (int i = 0; i < 10; i++) {
434      Put put = new Put(Bytes.toBytes(i)).addColumn(TEST_FAM, Bytes.toBytes("q"), Bytes.toBytes(i));
435      table.put(put);
436    }
437    String snapshotName = "testAsyncSnapshotWillNotBlockSnapshotHFileCleaner01";
438    UTIL.getAdmin().snapshotAsync(new org.apache.hadoop.hbase.client.SnapshotDescription(
439        snapshotName, TABLE_NAME, SnapshotType.FLUSH));
440    Waiter.waitFor(UTIL.getConfiguration(), 10 * 1000L, 200L,
441      () -> UTIL.getAdmin().listSnapshots(Pattern.compile(snapshotName)).size() == 1);
442    UTIL.waitFor(30000, () -> !master.getSnapshotManager().isTakingAnySnapshot());
443  }
444}