001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseIOException; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.TableName; 029import org.apache.hadoop.hbase.client.RegionInfo; 030import org.apache.hadoop.hbase.client.SnapshotDescription; 031import org.apache.hadoop.hbase.client.TableDescriptor; 032import org.apache.hadoop.hbase.procedure2.Procedure; 033import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 034import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 035import org.apache.hadoop.hbase.testclassification.LargeTests; 036import org.apache.hadoop.hbase.testclassification.MasterTests; 037import org.apache.hadoop.hbase.util.Bytes; 038import org.junit.After; 039import org.junit.AfterClass; 040import org.junit.Before; 041import org.junit.BeforeClass; 042import org.junit.ClassRule; 043import org.junit.Rule; 044import org.junit.Test; 045import org.junit.experimental.categories.Category; 046import org.junit.rules.TestName; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateRegionState; 051 052@Category({ MasterTests.class, LargeTests.class }) 053public class TestTruncateRegionProcedureWithRecovery extends TestTableDDLProcedureBase { 054 @ClassRule 055 public static final HBaseClassTestRule CLASS_RULE = 056 HBaseClassTestRule.forClass(TestTruncateRegionProcedureWithRecovery.class); 057 private static final Logger LOG = 058 LoggerFactory.getLogger(TestTruncateRegionProcedureWithRecovery.class); 059 060 @Rule 061 public TestName name = new TestName(); 062 063 private static void setupConf(Configuration conf) { 064 conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); 065 conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0); 066 conf.setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY, true); 067 conf.setInt("hbase.client.sync.wait.timeout.msec", 60000); 068 } 069 070 @BeforeClass 071 public static void setupCluster() throws Exception { 072 setupConf(UTIL.getConfiguration()); 073 UTIL.startMiniCluster(3); 074 } 075 076 @AfterClass 077 public static void cleanupTest() throws Exception { 078 try { 079 UTIL.shutdownMiniCluster(); 080 } catch (Exception e) { 081 LOG.warn("failure shutting down cluster", e); 082 } 083 } 084 085 @Before 086 public void setup() throws Exception { 087 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false); 088 089 // Turn off balancer, so it doesn't cut in and mess up our placements. 090 UTIL.getAdmin().balancerSwitch(false, true); 091 // Turn off the meta scanner, so it doesn't remove, parent on us. 092 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); 093 } 094 095 @After 096 public void tearDown() throws Exception { 097 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false); 098 for (TableDescriptor htd : UTIL.getAdmin().listTableDescriptors()) { 099 UTIL.deleteTable(htd.getTableName()); 100 } 101 } 102 103 @Test 104 public void testRecoverySnapshotRollback() throws Exception { 105 final TableName tableName = TableName.valueOf(name.getMethodName()); 106 final String[] families = new String[] { "f1", "f2" }; 107 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 108 109 // Create table with split keys 110 final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") }; 111 MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families); 112 113 // Insert data 114 insertData(UTIL, tableName, 2, 20, families); 115 insertData(UTIL, tableName, 2, 31, families); 116 insertData(UTIL, tableName, 2, 61, families); 117 118 // Get a region to truncate 119 MasterProcedureEnv environment = procExec.getEnvironment(); 120 RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream() 121 .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString())) 122 .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get(); 123 124 // Create a procedure that might fail. Use a simple approach that creates a custom procedure 125 // that fails after snapshot. 126 // Submit the failing procedure 127 long procId = 128 procExec.submitProcedure(new FailingTruncateRegionProcedure(environment, regionToTruncate)); 129 130 // Wait for procedure to complete (should fail) 131 ProcedureTestingUtility.waitProcedure(procExec, procId); 132 Procedure<MasterProcedureEnv> result = procExec.getResult(procId); 133 assertTrue("Procedure should have failed", result.isFailed()); 134 135 // Verify no recovery snapshots remain after rollback 136 boolean snapshotFound = false; 137 for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) { 138 if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) { 139 snapshotFound = true; 140 break; 141 } 142 } 143 assertTrue("Recovery snapshot should have been cleaned up during rollback", !snapshotFound); 144 } 145 146 @Test 147 public void testRecoverySnapshotAndRestore() throws Exception { 148 final TableName tableName = TableName.valueOf(name.getMethodName()); 149 final TableName restoredTableName = TableName.valueOf(name.getMethodName() + "_restored"); 150 final String[] families = new String[] { "f1", "f2" }; 151 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 152 153 // Create table with split keys 154 final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") }; 155 MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families); 156 157 // Insert data 158 insertData(UTIL, tableName, 2, 20, families); 159 insertData(UTIL, tableName, 2, 31, families); 160 insertData(UTIL, tableName, 2, 61, families); 161 int initialRowCount = UTIL.countRows(tableName); 162 163 // Get a region to truncate 164 MasterProcedureEnv environment = procExec.getEnvironment(); 165 RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream() 166 .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString())) 167 .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get(); 168 169 // Truncate the region (this should create a recovery snapshot) 170 long procId = 171 procExec.submitProcedure(new TruncateRegionProcedure(environment, regionToTruncate)); 172 ProcedureTestingUtility.waitProcedure(procExec, procId); 173 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 174 175 // Verify region is truncated (should have fewer rows) 176 int rowsAfterTruncate = UTIL.countRows(tableName); 177 assertTrue("Should have fewer rows after truncate", rowsAfterTruncate < initialRowCount); 178 179 // Find the recovery snapshot 180 String recoverySnapshotName = null; 181 for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) { 182 if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) { 183 recoverySnapshotName = snapshot.getName(); 184 break; 185 } 186 } 187 assertTrue("Recovery snapshot should exist", recoverySnapshotName != null); 188 189 // Restore from snapshot by cloning to a new table 190 UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName); 191 UTIL.waitUntilAllRegionsAssigned(restoredTableName); 192 193 // Verify restored table has original data 194 assertEquals("Restored table should have original data", initialRowCount, 195 UTIL.countRows(restoredTableName)); 196 197 // Clean up the cloned table 198 UTIL.getAdmin().disableTable(restoredTableName); 199 UTIL.getAdmin().deleteTable(restoredTableName); 200 } 201 202 public static class FailingTruncateRegionProcedure extends TruncateRegionProcedure { 203 private boolean failOnce = false; 204 205 public FailingTruncateRegionProcedure() { 206 super(); 207 } 208 209 public FailingTruncateRegionProcedure(MasterProcedureEnv env, RegionInfo region) 210 throws HBaseIOException { 211 super(env, region); 212 } 213 214 @Override 215 protected Flow executeFromState(MasterProcedureEnv env, TruncateRegionState state) 216 throws InterruptedException { 217 if (!failOnce && state == TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE) { 218 failOnce = true; 219 throw new RuntimeException("Simulated failure"); 220 } 221 return super.executeFromState(env, state); 222 } 223 } 224}