001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseIOException;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.TableName;
029import org.apache.hadoop.hbase.client.RegionInfo;
030import org.apache.hadoop.hbase.client.SnapshotDescription;
031import org.apache.hadoop.hbase.client.TableDescriptor;
032import org.apache.hadoop.hbase.procedure2.Procedure;
033import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
034import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
035import org.apache.hadoop.hbase.testclassification.LargeTests;
036import org.apache.hadoop.hbase.testclassification.MasterTests;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.junit.After;
039import org.junit.AfterClass;
040import org.junit.Before;
041import org.junit.BeforeClass;
042import org.junit.ClassRule;
043import org.junit.Rule;
044import org.junit.Test;
045import org.junit.experimental.categories.Category;
046import org.junit.rules.TestName;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateRegionState;
051
052@Category({ MasterTests.class, LargeTests.class })
053public class TestTruncateRegionProcedureWithRecovery extends TestTableDDLProcedureBase {
054  @ClassRule
055  public static final HBaseClassTestRule CLASS_RULE =
056    HBaseClassTestRule.forClass(TestTruncateRegionProcedureWithRecovery.class);
057  private static final Logger LOG =
058    LoggerFactory.getLogger(TestTruncateRegionProcedureWithRecovery.class);
059
060  @Rule
061  public TestName name = new TestName();
062
063  private static void setupConf(Configuration conf) {
064    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
065    conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0);
066    conf.setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY, true);
067    conf.setInt("hbase.client.sync.wait.timeout.msec", 60000);
068  }
069
070  @BeforeClass
071  public static void setupCluster() throws Exception {
072    setupConf(UTIL.getConfiguration());
073    UTIL.startMiniCluster(3);
074  }
075
076  @AfterClass
077  public static void cleanupTest() throws Exception {
078    try {
079      UTIL.shutdownMiniCluster();
080    } catch (Exception e) {
081      LOG.warn("failure shutting down cluster", e);
082    }
083  }
084
085  @Before
086  public void setup() throws Exception {
087    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false);
088
089    // Turn off balancer, so it doesn't cut in and mess up our placements.
090    UTIL.getAdmin().balancerSwitch(false, true);
091    // Turn off the meta scanner, so it doesn't remove, parent on us.
092    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
093  }
094
095  @After
096  public void tearDown() throws Exception {
097    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false);
098    for (TableDescriptor htd : UTIL.getAdmin().listTableDescriptors()) {
099      UTIL.deleteTable(htd.getTableName());
100    }
101  }
102
103  @Test
104  public void testRecoverySnapshotRollback() throws Exception {
105    final TableName tableName = TableName.valueOf(name.getMethodName());
106    final String[] families = new String[] { "f1", "f2" };
107    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
108
109    // Create table with split keys
110    final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") };
111    MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families);
112
113    // Insert data
114    insertData(UTIL, tableName, 2, 20, families);
115    insertData(UTIL, tableName, 2, 31, families);
116    insertData(UTIL, tableName, 2, 61, families);
117
118    // Get a region to truncate
119    MasterProcedureEnv environment = procExec.getEnvironment();
120    RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream()
121      .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString()))
122      .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get();
123
124    // Create a procedure that might fail. Use a simple approach that creates a custom procedure
125    // that fails after snapshot.
126    // Submit the failing procedure
127    long procId =
128      procExec.submitProcedure(new FailingTruncateRegionProcedure(environment, regionToTruncate));
129
130    // Wait for procedure to complete (should fail)
131    ProcedureTestingUtility.waitProcedure(procExec, procId);
132    Procedure<MasterProcedureEnv> result = procExec.getResult(procId);
133    assertTrue("Procedure should have failed", result.isFailed());
134
135    // Verify no recovery snapshots remain after rollback
136    boolean snapshotFound = false;
137    for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
138      if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) {
139        snapshotFound = true;
140        break;
141      }
142    }
143    assertTrue("Recovery snapshot should have been cleaned up during rollback", !snapshotFound);
144  }
145
146  @Test
147  public void testRecoverySnapshotAndRestore() throws Exception {
148    final TableName tableName = TableName.valueOf(name.getMethodName());
149    final TableName restoredTableName = TableName.valueOf(name.getMethodName() + "_restored");
150    final String[] families = new String[] { "f1", "f2" };
151    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
152
153    // Create table with split keys
154    final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") };
155    MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families);
156
157    // Insert data
158    insertData(UTIL, tableName, 2, 20, families);
159    insertData(UTIL, tableName, 2, 31, families);
160    insertData(UTIL, tableName, 2, 61, families);
161    int initialRowCount = UTIL.countRows(tableName);
162
163    // Get a region to truncate
164    MasterProcedureEnv environment = procExec.getEnvironment();
165    RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream()
166      .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString()))
167      .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get();
168
169    // Truncate the region (this should create a recovery snapshot)
170    long procId =
171      procExec.submitProcedure(new TruncateRegionProcedure(environment, regionToTruncate));
172    ProcedureTestingUtility.waitProcedure(procExec, procId);
173    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
174
175    // Verify region is truncated (should have fewer rows)
176    int rowsAfterTruncate = UTIL.countRows(tableName);
177    assertTrue("Should have fewer rows after truncate", rowsAfterTruncate < initialRowCount);
178
179    // Find the recovery snapshot
180    String recoverySnapshotName = null;
181    for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
182      if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) {
183        recoverySnapshotName = snapshot.getName();
184        break;
185      }
186    }
187    assertTrue("Recovery snapshot should exist", recoverySnapshotName != null);
188
189    // Restore from snapshot by cloning to a new table
190    UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName);
191    UTIL.waitUntilAllRegionsAssigned(restoredTableName);
192
193    // Verify restored table has original data
194    assertEquals("Restored table should have original data", initialRowCount,
195      UTIL.countRows(restoredTableName));
196
197    // Clean up the cloned table
198    UTIL.getAdmin().disableTable(restoredTableName);
199    UTIL.getAdmin().deleteTable(restoredTableName);
200  }
201
202  public static class FailingTruncateRegionProcedure extends TruncateRegionProcedure {
203    private boolean failOnce = false;
204
205    public FailingTruncateRegionProcedure() {
206      super();
207    }
208
209    public FailingTruncateRegionProcedure(MasterProcedureEnv env, RegionInfo region)
210      throws HBaseIOException {
211      super(env, region);
212    }
213
214    @Override
215    protected Flow executeFromState(MasterProcedureEnv env, TruncateRegionState state)
216      throws InterruptedException {
217      if (!failOnce && state == TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE) {
218        failOnce = true;
219        throw new RuntimeException("Simulated failure");
220      }
221      return super.executeFromState(env, state);
222    }
223  }
224}