001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData;
021import static org.junit.jupiter.api.Assertions.assertEquals;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseIOException;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.client.SnapshotDescription;
030import org.apache.hadoop.hbase.client.TableDescriptor;
031import org.apache.hadoop.hbase.procedure2.Procedure;
032import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
033import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
034import org.apache.hadoop.hbase.testclassification.LargeTests;
035import org.apache.hadoop.hbase.testclassification.MasterTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.junit.jupiter.api.AfterAll;
038import org.junit.jupiter.api.AfterEach;
039import org.junit.jupiter.api.BeforeAll;
040import org.junit.jupiter.api.BeforeEach;
041import org.junit.jupiter.api.Tag;
042import org.junit.jupiter.api.Test;
043import org.junit.jupiter.api.TestInfo;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateRegionState;
048
049@Tag(MasterTests.TAG)
050@Tag(LargeTests.TAG)
051public class TestTruncateRegionProcedureWithRecovery extends TestTableDDLProcedureBase {
052  private static final Logger LOG =
053    LoggerFactory.getLogger(TestTruncateRegionProcedureWithRecovery.class);
054  private String testMethodName;
055
056  @BeforeEach
057  public void setTestMethod(TestInfo testInfo) {
058    testMethodName = testInfo.getTestMethod().get().getName();
059  }
060
061  protected static void setupConf(Configuration conf) {
062    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
063    conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0);
064    conf.setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY, true);
065    conf.setInt("hbase.client.sync.wait.timeout.msec", 60000);
066  }
067
068  @BeforeAll
069  public static void setupCluster() throws Exception {
070    setupConf(UTIL.getConfiguration());
071    UTIL.startMiniCluster(3);
072  }
073
074  @AfterAll
075  public static void cleanupTest() throws Exception {
076    TestTableDDLProcedureBase.cleanupTest();
077  }
078
079  @BeforeEach
080  public void setup() throws Exception {
081    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false);
082
083    // Turn off balancer, so it doesn't cut in and mess up our placements.
084    UTIL.getAdmin().balancerSwitch(false, true);
085    // Turn off the meta scanner, so it doesn't remove, parent on us.
086    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
087  }
088
089  @AfterEach
090  public void tearDown() throws Exception {
091    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false);
092    for (TableDescriptor htd : UTIL.getAdmin().listTableDescriptors()) {
093      UTIL.deleteTable(htd.getTableName());
094    }
095  }
096
097  @Test
098  public void testRecoverySnapshotRollback() throws Exception {
099    final TableName tableName = TableName.valueOf(testMethodName);
100    final String[] families = new String[] { "f1", "f2" };
101    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
102
103    // Create table with split keys
104    final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") };
105    MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families);
106
107    // Insert data
108    insertData(UTIL, tableName, 2, 20, families);
109    insertData(UTIL, tableName, 2, 31, families);
110    insertData(UTIL, tableName, 2, 61, families);
111
112    // Get a region to truncate
113    MasterProcedureEnv environment = procExec.getEnvironment();
114    RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream()
115      .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString()))
116      .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get();
117
118    // Create a procedure that might fail. Use a simple approach that creates a custom procedure
119    // that fails after snapshot.
120    // Submit the failing procedure
121    long procId =
122      procExec.submitProcedure(new FailingTruncateRegionProcedure(environment, regionToTruncate));
123
124    // Wait for procedure to complete (should fail)
125    ProcedureTestingUtility.waitProcedure(procExec, procId);
126    Procedure<MasterProcedureEnv> result = procExec.getResult(procId);
127    assertTrue(result.isFailed(), "Procedure should have failed");
128
129    // Verify no recovery snapshots remain after rollback
130    boolean snapshotFound = false;
131    for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
132      if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) {
133        snapshotFound = true;
134        break;
135      }
136    }
137    assertTrue(!snapshotFound, "Recovery snapshot should have been cleaned up during rollback");
138  }
139
140  @Test
141  public void testRecoverySnapshotAndRestore() throws Exception {
142    final TableName tableName = TableName.valueOf(testMethodName);
143    final TableName restoredTableName = TableName.valueOf(testMethodName + "_restored");
144    final String[] families = new String[] { "f1", "f2" };
145    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
146
147    // Create table with split keys
148    final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") };
149    MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families);
150
151    // Insert data
152    insertData(UTIL, tableName, 2, 20, families);
153    insertData(UTIL, tableName, 2, 31, families);
154    insertData(UTIL, tableName, 2, 61, families);
155    int initialRowCount = UTIL.countRows(tableName);
156
157    // Get a region to truncate
158    MasterProcedureEnv environment = procExec.getEnvironment();
159    RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream()
160      .filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString()))
161      .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get();
162
163    // Truncate the region (this should create a recovery snapshot)
164    long procId =
165      procExec.submitProcedure(new TruncateRegionProcedure(environment, regionToTruncate));
166    ProcedureTestingUtility.waitProcedure(procExec, procId);
167    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
168
169    // Verify region is truncated (should have fewer rows)
170    int rowsAfterTruncate = UTIL.countRows(tableName);
171    assertTrue(rowsAfterTruncate < initialRowCount, "Should have fewer rows after truncate");
172
173    // Find the recovery snapshot
174    String recoverySnapshotName = null;
175    for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
176      if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) {
177        recoverySnapshotName = snapshot.getName();
178        break;
179      }
180    }
181    assertTrue(recoverySnapshotName != null, "Recovery snapshot should exist");
182
183    // Restore from snapshot by cloning to a new table
184    UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName);
185    UTIL.waitUntilAllRegionsAssigned(restoredTableName);
186
187    // Verify restored table has original data
188    assertEquals(initialRowCount, UTIL.countRows(restoredTableName),
189      "Restored table should have original data");
190
191    // Clean up the cloned table
192    UTIL.getAdmin().disableTable(restoredTableName);
193    UTIL.getAdmin().deleteTable(restoredTableName);
194  }
195
196  public static class FailingTruncateRegionProcedure extends TruncateRegionProcedure {
197    private boolean failOnce = false;
198
199    public FailingTruncateRegionProcedure() {
200      super();
201    }
202
203    public FailingTruncateRegionProcedure(MasterProcedureEnv env, RegionInfo region)
204      throws HBaseIOException {
205      super(env, region);
206    }
207
208    @Override
209    protected Flow executeFromState(MasterProcedureEnv env, TruncateRegionState state)
210      throws InterruptedException {
211      if (!failOnce && state == TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE) {
212        failOnce = true;
213        throw new RuntimeException("Simulated failure");
214      }
215      return super.executeFromState(env, state);
216    }
217  }
218}