001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertFalse;
022import static org.junit.jupiter.api.Assertions.assertNotNull;
023import static org.junit.jupiter.api.Assertions.assertTrue;
024
025import java.io.IOException;
026import java.util.Optional;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.FileSystem;
029import org.apache.hadoop.fs.Path;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.ServerName;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.RegionInfo;
034import org.apache.hadoop.hbase.client.SnapshotDescription;
035import org.apache.hadoop.hbase.client.SnapshotType;
036import org.apache.hadoop.hbase.client.Table;
037import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
038import org.apache.hadoop.hbase.master.HMaster;
039import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
040import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
041import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher;
042import org.apache.hadoop.hbase.regionserver.HRegion;
043import org.apache.hadoop.hbase.regionserver.HStoreFile;
044import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
045import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.testclassification.RegionServerTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.CommonFSUtils;
050import org.apache.hadoop.hbase.util.RegionSplitter;
051import org.junit.jupiter.api.AfterEach;
052import org.junit.jupiter.api.BeforeEach;
053import org.junit.jupiter.api.Tag;
054import org.junit.jupiter.api.Test;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
059import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
060
061@Tag(RegionServerTests.TAG)
062@Tag(LargeTests.TAG)
063public class TestSnapshotVerifyProcedure {
064  private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotVerifyProcedure.class);
065
066  private HBaseTestingUtil TEST_UTIL;
067  private final TableName tableName = TableName.valueOf("TestRSSnapshotVerifier");
068  private final byte[] cf = Bytes.toBytes("cf");
069  private final SnapshotDescription snapshot =
070    new SnapshotDescription("test-snapshot", tableName, SnapshotType.FLUSH);
071  private SnapshotProtos.SnapshotDescription snapshotProto =
072    ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot);
073
074  @BeforeEach
075  public void setup() throws Exception {
076    TEST_UTIL = new HBaseTestingUtil();
077    Configuration conf = TEST_UTIL.getConfiguration();
078    // delay procedure dispatch
079    conf.setInt(RemoteProcedureDispatcher.DISPATCH_DELAY_CONF_KEY, 10000);
080    conf.setInt(RemoteProcedureDispatcher.DISPATCH_MAX_QUEUE_SIZE_CONF_KEY, 128);
081    TEST_UTIL.startMiniCluster(3);
082    final byte[][] splitKeys = new RegionSplitter.HexStringSplit().split(10);
083    Table table = TEST_UTIL.createTable(tableName, cf, splitKeys);
084    TEST_UTIL.loadTable(table, cf, false);
085    TEST_UTIL.getAdmin().flush(tableName);
086
087    // prepare unverified snapshot
088    snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, conf);
089    Path rootDir = CommonFSUtils.getRootDir(conf);
090    Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshotProto, rootDir, conf);
091    FileSystem workingDirFs = workingDir.getFileSystem(conf);
092    if (!workingDirFs.exists(workingDir)) {
093      workingDirFs.mkdirs(workingDir);
094    }
095    ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(snapshot.getName());
096    SnapshotManifest manifest =
097      SnapshotManifest.create(conf, workingDirFs, workingDir, snapshotProto, monitor);
098    manifest.addTableDescriptor(
099      TEST_UTIL.getHBaseCluster().getMaster().getTableDescriptors().get(tableName));
100    SnapshotDescriptionUtils.writeSnapshotInfo(snapshotProto, workingDir, workingDirFs);
101    TEST_UTIL.getHBaseCluster().getRegions(tableName).forEach(r -> {
102      try {
103        r.addRegionToSnapshot(snapshotProto, monitor);
104      } catch (IOException e) {
105        LOG.warn("Failed snapshot region {}", r.getRegionInfo());
106      }
107    });
108    manifest.consolidate();
109  }
110
111  @Test
112  public void testSimpleVerify() throws Exception {
113    Optional<HRegion> regionOpt = TEST_UTIL.getHBaseCluster().getRegions(tableName).stream()
114      .filter(r -> !r.getStore(cf).getStorefiles().isEmpty()).findFirst();
115    assertTrue(regionOpt.isPresent());
116    HRegion region = regionOpt.get();
117    SnapshotVerifyProcedure p1 = new SnapshotVerifyProcedure(snapshotProto, region.getRegionInfo());
118    ProcedureExecutor<MasterProcedureEnv> procExec =
119      TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
120    long procId = procExec.submitProcedure(p1);
121    ProcedureTestingUtility.waitProcedure(procExec, procId);
122    assertTrue(p1.isSuccess());
123
124    // delete store file to trigger a CorruptedSnapshotException
125    for (HStoreFile file : region.getStore(cf).getStorefiles()) {
126      TEST_UTIL.getDFSCluster().getFileSystem().delete(file.getPath(), true);
127      LOG.info("delete store file {}", file.getPath());
128    }
129    SnapshotVerifyProcedure p2 = new SnapshotVerifyProcedure(snapshotProto, region.getRegionInfo());
130    long newProcId = procExec.submitProcedure(p2);
131    ProcedureTestingUtility.waitProcedure(procExec, newProcId);
132    assertTrue(p2.isSuccess());
133  }
134
135  @Test
136  public void testRestartMaster() throws Exception {
137    RegionInfo region = TEST_UTIL.getHBaseCluster().getRegions(tableName).get(0).getRegionInfo();
138    SnapshotVerifyProcedure svp = new SnapshotVerifyProcedure(snapshotProto, region);
139    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
140    long procId = master.getMasterProcedureExecutor().submitProcedure(svp);
141    TEST_UTIL.waitFor(10000, () -> svp.getServerName() != null);
142    ServerName worker = svp.getServerName();
143    int availableWorker = master.getSnapshotManager().getAvailableWorker(worker);
144
145    // restart master
146    TEST_UTIL.getHBaseCluster().killMaster(master.getServerName());
147    TEST_UTIL.getHBaseCluster().waitForMasterToStop(master.getServerName(), 30000);
148    TEST_UTIL.getHBaseCluster().startMaster();
149    TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster();
150
151    // restore used worker
152    master = TEST_UTIL.getHBaseCluster().getMaster();
153    SnapshotVerifyProcedure svp2 =
154      master.getMasterProcedureExecutor().getProcedure(SnapshotVerifyProcedure.class, procId);
155    assertNotNull(svp2);
156    assertFalse(svp2.isFinished());
157    assertNotNull(svp2.getServerName());
158    assertEquals(worker, svp.getServerName());
159    assertEquals((int) master.getSnapshotManager().getAvailableWorker(worker), availableWorker);
160
161    // release worker
162    ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), svp2);
163    assertEquals((int) master.getSnapshotManager().getAvailableWorker(worker), availableWorker + 1);
164  }
165
166  @AfterEach
167  public void teardown() throws Exception {
168    TEST_UTIL.shutdownMiniCluster();
169  }
170}