001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData; 021import static org.junit.jupiter.api.Assertions.assertEquals; 022import static org.junit.jupiter.api.Assertions.assertTrue; 023 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseIOException; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.TableName; 028import org.apache.hadoop.hbase.client.RegionInfo; 029import org.apache.hadoop.hbase.client.SnapshotDescription; 030import org.apache.hadoop.hbase.client.TableDescriptor; 031import org.apache.hadoop.hbase.procedure2.Procedure; 032import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 033import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 034import org.apache.hadoop.hbase.testclassification.LargeTests; 035import org.apache.hadoop.hbase.testclassification.MasterTests; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.junit.jupiter.api.AfterAll; 038import org.junit.jupiter.api.AfterEach; 039import org.junit.jupiter.api.BeforeAll; 040import org.junit.jupiter.api.BeforeEach; 041import org.junit.jupiter.api.Tag; 042import org.junit.jupiter.api.Test; 043import org.junit.jupiter.api.TestInfo; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateRegionState; 048 049@Tag(MasterTests.TAG) 050@Tag(LargeTests.TAG) 051public class TestTruncateRegionProcedureWithRecovery extends TestTableDDLProcedureBase { 052 private static final Logger LOG = 053 LoggerFactory.getLogger(TestTruncateRegionProcedureWithRecovery.class); 054 private String testMethodName; 055 056 @BeforeEach 057 public void setTestMethod(TestInfo testInfo) { 058 testMethodName = testInfo.getTestMethod().get().getName(); 059 } 060 061 protected static void setupConf(Configuration conf) { 062 conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); 063 conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0); 064 conf.setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY, true); 065 conf.setInt("hbase.client.sync.wait.timeout.msec", 60000); 066 } 067 068 @BeforeAll 069 public static void setupCluster() throws Exception { 070 setupConf(UTIL.getConfiguration()); 071 UTIL.startMiniCluster(3); 072 } 073 074 @AfterAll 075 public static void cleanupTest() throws Exception { 076 TestTableDDLProcedureBase.cleanupTest(); 077 } 078 079 @BeforeEach 080 public void setup() throws Exception { 081 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false); 082 083 // Turn off balancer, so it doesn't cut in and mess up our placements. 084 UTIL.getAdmin().balancerSwitch(false, true); 085 // Turn off the meta scanner, so it doesn't remove, parent on us. 086 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); 087 } 088 089 @AfterEach 090 public void tearDown() throws Exception { 091 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false); 092 for (TableDescriptor htd : UTIL.getAdmin().listTableDescriptors()) { 093 UTIL.deleteTable(htd.getTableName()); 094 } 095 } 096 097 @Test 098 public void testRecoverySnapshotRollback() throws Exception { 099 final TableName tableName = TableName.valueOf(testMethodName); 100 final String[] families = new String[] { "f1", "f2" }; 101 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 102 103 // Create table with split keys 104 final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") }; 105 MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families); 106 107 // Insert data 108 insertData(UTIL, tableName, 2, 20, families); 109 insertData(UTIL, tableName, 2, 31, families); 110 insertData(UTIL, tableName, 2, 61, families); 111 112 // Get a region to truncate 113 MasterProcedureEnv environment = procExec.getEnvironment(); 114 RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream() 115 .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString())) 116 .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get(); 117 118 // Create a procedure that might fail. Use a simple approach that creates a custom procedure 119 // that fails after snapshot. 120 // Submit the failing procedure 121 long procId = 122 procExec.submitProcedure(new FailingTruncateRegionProcedure(environment, regionToTruncate)); 123 124 // Wait for procedure to complete (should fail) 125 ProcedureTestingUtility.waitProcedure(procExec, procId); 126 Procedure<MasterProcedureEnv> result = procExec.getResult(procId); 127 assertTrue(result.isFailed(), "Procedure should have failed"); 128 129 // Verify no recovery snapshots remain after rollback 130 boolean snapshotFound = false; 131 for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) { 132 if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) { 133 snapshotFound = true; 134 break; 135 } 136 } 137 assertTrue(!snapshotFound, "Recovery snapshot should have been cleaned up during rollback"); 138 } 139 140 @Test 141 public void testRecoverySnapshotAndRestore() throws Exception { 142 final TableName tableName = TableName.valueOf(testMethodName); 143 final TableName restoredTableName = TableName.valueOf(testMethodName + "_restored"); 144 final String[] families = new String[] { "f1", "f2" }; 145 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 146 147 // Create table with split keys 148 final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") }; 149 MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families); 150 151 // Insert data 152 insertData(UTIL, tableName, 2, 20, families); 153 insertData(UTIL, tableName, 2, 31, families); 154 insertData(UTIL, tableName, 2, 61, families); 155 int initialRowCount = UTIL.countRows(tableName); 156 157 // Get a region to truncate 158 MasterProcedureEnv environment = procExec.getEnvironment(); 159 RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream() 160 .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString())) 161 .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get(); 162 163 // Truncate the region (this should create a recovery snapshot) 164 long procId = 165 procExec.submitProcedure(new TruncateRegionProcedure(environment, regionToTruncate)); 166 ProcedureTestingUtility.waitProcedure(procExec, procId); 167 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 168 169 // Verify region is truncated (should have fewer rows) 170 int rowsAfterTruncate = UTIL.countRows(tableName); 171 assertTrue(rowsAfterTruncate < initialRowCount, "Should have fewer rows after truncate"); 172 173 // Find the recovery snapshot 174 String recoverySnapshotName = null; 175 for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) { 176 if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) { 177 recoverySnapshotName = snapshot.getName(); 178 break; 179 } 180 } 181 assertTrue(recoverySnapshotName != null, "Recovery snapshot should exist"); 182 183 // Restore from snapshot by cloning to a new table 184 UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName); 185 UTIL.waitUntilAllRegionsAssigned(restoredTableName); 186 187 // Verify restored table has original data 188 assertEquals(initialRowCount, UTIL.countRows(restoredTableName), 189 "Restored table should have original data"); 190 191 // Clean up the cloned table 192 UTIL.getAdmin().disableTable(restoredTableName); 193 UTIL.getAdmin().deleteTable(restoredTableName); 194 } 195 196 public static class FailingTruncateRegionProcedure extends TruncateRegionProcedure { 197 private boolean failOnce = false; 198 199 public FailingTruncateRegionProcedure() { 200 super(); 201 } 202 203 public FailingTruncateRegionProcedure(MasterProcedureEnv env, RegionInfo region) 204 throws HBaseIOException { 205 super(env, region); 206 } 207 208 @Override 209 protected Flow executeFromState(MasterProcedureEnv env, TruncateRegionState state) 210 throws InterruptedException { 211 if (!failOnce && state == TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE) { 212 failOnce = true; 213 throw new RuntimeException("Simulated failure"); 214 } 215 return super.executeFromState(env, state); 216 } 217 } 218}