001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertFalse; 022import static org.junit.jupiter.api.Assertions.assertNotNull; 023import static org.junit.jupiter.api.Assertions.assertTrue; 024 025import java.io.IOException; 026import java.util.Optional; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.HBaseTestingUtil; 031import org.apache.hadoop.hbase.ServerName; 032import org.apache.hadoop.hbase.TableName; 033import org.apache.hadoop.hbase.client.RegionInfo; 034import org.apache.hadoop.hbase.client.SnapshotDescription; 035import org.apache.hadoop.hbase.client.SnapshotType; 036import org.apache.hadoop.hbase.client.Table; 037import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; 038import org.apache.hadoop.hbase.master.HMaster; 039import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 040import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 041import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher; 042import org.apache.hadoop.hbase.regionserver.HRegion; 043import org.apache.hadoop.hbase.regionserver.HStoreFile; 044import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 045import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 046import org.apache.hadoop.hbase.testclassification.LargeTests; 047import org.apache.hadoop.hbase.testclassification.RegionServerTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.CommonFSUtils; 050import org.apache.hadoop.hbase.util.RegionSplitter; 051import org.junit.jupiter.api.AfterEach; 052import org.junit.jupiter.api.BeforeEach; 053import org.junit.jupiter.api.Tag; 054import org.junit.jupiter.api.Test; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 059import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 060 061@Tag(RegionServerTests.TAG) 062@Tag(LargeTests.TAG) 063public class TestSnapshotVerifyProcedure { 064 private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotVerifyProcedure.class); 065 066 private HBaseTestingUtil TEST_UTIL; 067 private final TableName tableName = TableName.valueOf("TestRSSnapshotVerifier"); 068 private final byte[] cf = Bytes.toBytes("cf"); 069 private final SnapshotDescription snapshot = 070 new SnapshotDescription("test-snapshot", tableName, SnapshotType.FLUSH); 071 private SnapshotProtos.SnapshotDescription snapshotProto = 072 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); 073 074 @BeforeEach 075 public void setup() throws Exception { 076 TEST_UTIL = new HBaseTestingUtil(); 077 Configuration conf = TEST_UTIL.getConfiguration(); 078 // delay procedure dispatch 079 conf.setInt(RemoteProcedureDispatcher.DISPATCH_DELAY_CONF_KEY, 10000); 080 conf.setInt(RemoteProcedureDispatcher.DISPATCH_MAX_QUEUE_SIZE_CONF_KEY, 128); 081 TEST_UTIL.startMiniCluster(3); 082 final byte[][] splitKeys = new RegionSplitter.HexStringSplit().split(10); 083 Table table = TEST_UTIL.createTable(tableName, cf, splitKeys); 084 TEST_UTIL.loadTable(table, cf, false); 085 TEST_UTIL.getAdmin().flush(tableName); 086 087 // prepare unverified snapshot 088 snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, conf); 089 Path rootDir = CommonFSUtils.getRootDir(conf); 090 Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshotProto, rootDir, conf); 091 FileSystem workingDirFs = workingDir.getFileSystem(conf); 092 if (!workingDirFs.exists(workingDir)) { 093 workingDirFs.mkdirs(workingDir); 094 } 095 ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(snapshot.getName()); 096 SnapshotManifest manifest = 097 SnapshotManifest.create(conf, workingDirFs, workingDir, snapshotProto, monitor); 098 manifest.addTableDescriptor( 099 TEST_UTIL.getHBaseCluster().getMaster().getTableDescriptors().get(tableName)); 100 SnapshotDescriptionUtils.writeSnapshotInfo(snapshotProto, workingDir, workingDirFs); 101 TEST_UTIL.getHBaseCluster().getRegions(tableName).forEach(r -> { 102 try { 103 r.addRegionToSnapshot(snapshotProto, monitor); 104 } catch (IOException e) { 105 LOG.warn("Failed snapshot region {}", r.getRegionInfo()); 106 } 107 }); 108 manifest.consolidate(); 109 } 110 111 @Test 112 public void testSimpleVerify() throws Exception { 113 Optional<HRegion> regionOpt = TEST_UTIL.getHBaseCluster().getRegions(tableName).stream() 114 .filter(r -> !r.getStore(cf).getStorefiles().isEmpty()).findFirst(); 115 assertTrue(regionOpt.isPresent()); 116 HRegion region = regionOpt.get(); 117 SnapshotVerifyProcedure p1 = new SnapshotVerifyProcedure(snapshotProto, region.getRegionInfo()); 118 ProcedureExecutor<MasterProcedureEnv> procExec = 119 TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 120 long procId = procExec.submitProcedure(p1); 121 ProcedureTestingUtility.waitProcedure(procExec, procId); 122 assertTrue(p1.isSuccess()); 123 124 // delete store file to trigger a CorruptedSnapshotException 125 for (HStoreFile file : region.getStore(cf).getStorefiles()) { 126 TEST_UTIL.getDFSCluster().getFileSystem().delete(file.getPath(), true); 127 LOG.info("delete store file {}", file.getPath()); 128 } 129 SnapshotVerifyProcedure p2 = new SnapshotVerifyProcedure(snapshotProto, region.getRegionInfo()); 130 long newProcId = procExec.submitProcedure(p2); 131 ProcedureTestingUtility.waitProcedure(procExec, newProcId); 132 assertTrue(p2.isSuccess()); 133 } 134 135 @Test 136 public void testRestartMaster() throws Exception { 137 RegionInfo region = TEST_UTIL.getHBaseCluster().getRegions(tableName).get(0).getRegionInfo(); 138 SnapshotVerifyProcedure svp = new SnapshotVerifyProcedure(snapshotProto, region); 139 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 140 long procId = master.getMasterProcedureExecutor().submitProcedure(svp); 141 TEST_UTIL.waitFor(10000, () -> svp.getServerName() != null); 142 ServerName worker = svp.getServerName(); 143 int availableWorker = master.getSnapshotManager().getAvailableWorker(worker); 144 145 // restart master 146 TEST_UTIL.getHBaseCluster().killMaster(master.getServerName()); 147 TEST_UTIL.getHBaseCluster().waitForMasterToStop(master.getServerName(), 30000); 148 TEST_UTIL.getHBaseCluster().startMaster(); 149 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(); 150 151 // restore used worker 152 master = TEST_UTIL.getHBaseCluster().getMaster(); 153 SnapshotVerifyProcedure svp2 = 154 master.getMasterProcedureExecutor().getProcedure(SnapshotVerifyProcedure.class, procId); 155 assertNotNull(svp2); 156 assertFalse(svp2.isFinished()); 157 assertNotNull(svp2.getServerName()); 158 assertEquals(worker, svp.getServerName()); 159 assertEquals((int) master.getSnapshotManager().getAvailableWorker(worker), availableWorker); 160 161 // release worker 162 ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), svp2); 163 assertEquals((int) master.getSnapshotManager().getAvailableWorker(worker), availableWorker + 1); 164 } 165 166 @AfterEach 167 public void teardown() throws Exception { 168 TEST_UTIL.shutdownMiniCluster(); 169 } 170}