001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertTrue;
022
023import org.apache.hadoop.hbase.HBaseIOException;
024import org.apache.hadoop.hbase.HConstants;
025import org.apache.hadoop.hbase.TableName;
026import org.apache.hadoop.hbase.client.SnapshotDescription;
027import org.apache.hadoop.hbase.client.TableDescriptor;
028import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
029import org.apache.hadoop.hbase.procedure2.Procedure;
030import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
031import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
032import org.apache.hadoop.hbase.testclassification.LargeTests;
033import org.apache.hadoop.hbase.testclassification.MasterTests;
034import org.junit.jupiter.api.AfterAll;
035import org.junit.jupiter.api.BeforeAll;
036import org.junit.jupiter.api.BeforeEach;
037import org.junit.jupiter.api.Tag;
038import org.junit.jupiter.api.Test;
039import org.junit.jupiter.api.TestInfo;
040
041import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ModifyTableState;
042
043@Tag(MasterTests.TAG)
044@Tag(LargeTests.TAG)
045public class TestModifyTableProcedureWithRecovery extends TestTableDDLProcedureBase {
046  private String testMethodName;
047
048  @BeforeEach
049  public void setTestMethod(TestInfo testInfo) {
050    testMethodName = testInfo.getTestMethod().get().getName();
051  }
052
053  @BeforeAll
054  public static void setupCluster() throws Exception {
055    // Enable recovery snapshots
056    TestTableDDLProcedureBase.setupConf(UTIL.getConfiguration());
057    UTIL.getConfiguration().setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY,
058      true);
059    UTIL.startMiniCluster(1);
060  }
061
062  @AfterAll
063  public static void cleanupTest() throws Exception {
064    TestTableDDLProcedureBase.cleanupTest();
065  }
066
067  @Test
068  public void testRecoverySnapshotRollback() throws Exception {
069    final TableName tableName = TableName.valueOf(testMethodName);
070    final String cf1 = "cf1";
071    final String cf2 = "cf2";
072    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
073
074    // Create table with multiple column families
075    MasterProcedureTestingUtility.createTable(procExec, tableName, null, cf1, cf2);
076    MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName, 100, new byte[0][],
077      new String[] { cf1, cf2 });
078    UTIL.getAdmin().disableTable(tableName);
079
080    // Create a procedure that will fail - modify to delete a column family
081    // but simulate failure after snapshot creation
082    // Modify table to remove cf2 (which should trigger recovery snapshot)
083    TableDescriptor originalHtd = UTIL.getAdmin().getDescriptor(tableName);
084    TableDescriptor modifiedHtd =
085      TableDescriptorBuilder.newBuilder(originalHtd).removeColumnFamily(cf2.getBytes()).build();
086
087    // Submit the failing procedure
088    long procId = procExec
089      .submitProcedure(new FailingModifyTableProcedure(procExec.getEnvironment(), modifiedHtd));
090
091    // Wait for procedure to complete (should fail)
092    ProcedureTestingUtility.waitProcedure(procExec, procId);
093    Procedure<MasterProcedureEnv> result = procExec.getResult(procId);
094    assertTrue(result.isFailed(), "Procedure should have failed");
095
096    // Verify no recovery snapshots remain after rollback
097    boolean snapshotFound = false;
098    for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
099      if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) {
100        snapshotFound = true;
101        break;
102      }
103    }
104    assertTrue(!snapshotFound, "Recovery snapshot should have been cleaned up during rollback");
105  }
106
107  @Test
108  public void testRecoverySnapshotAndRestore() throws Exception {
109    final TableName tableName = TableName.valueOf(testMethodName);
110    final TableName restoredTableName = TableName.valueOf(testMethodName + "_restored");
111    final String cf1 = "cf1";
112    final String cf2 = "cf2";
113    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
114
115    // Create table with multiple column families
116    MasterProcedureTestingUtility.createTable(procExec, tableName, null, cf1, cf2);
117    MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName, 100, new byte[0][],
118      new String[] { cf1, cf2 });
119    UTIL.getAdmin().disableTable(tableName);
120
121    // Modify table to remove cf2 (which should trigger recovery snapshot)
122    TableDescriptor originalHtd = UTIL.getAdmin().getDescriptor(tableName);
123    TableDescriptor modifiedHtd =
124      TableDescriptorBuilder.newBuilder(originalHtd).removeColumnFamily(cf2.getBytes()).build();
125
126    long procId = ProcedureTestingUtility.submitAndWait(procExec,
127      new ModifyTableProcedure(procExec.getEnvironment(), modifiedHtd));
128    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
129
130    // Verify table modification was successful
131    TableDescriptor currentHtd = UTIL.getAdmin().getDescriptor(tableName);
132    assertEquals(1, currentHtd.getColumnFamilyNames().size(), "Should have one column family");
133    assertTrue(currentHtd.hasColumnFamily(cf1.getBytes()), "Should only have cf1");
134
135    // Find the recovery snapshot
136    String recoverySnapshotName = null;
137    for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
138      if (snapshot.getName().startsWith("auto_" + tableName.getNameAsString())) {
139        recoverySnapshotName = snapshot.getName();
140        break;
141      }
142    }
143    assertTrue(recoverySnapshotName != null, "Recovery snapshot should exist");
144
145    // Restore from snapshot by cloning to a new table
146    UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName);
147    UTIL.waitUntilAllRegionsAssigned(restoredTableName);
148
149    // Verify restored table has original structure with both column families
150    TableDescriptor restoredHtd = UTIL.getAdmin().getDescriptor(restoredTableName);
151    assertEquals(2, restoredHtd.getColumnFamilyNames().size(), "Should have two column families");
152    assertTrue(restoredHtd.hasColumnFamily(cf1.getBytes()), "Should have cf1");
153    assertTrue(restoredHtd.hasColumnFamily(cf2.getBytes()), "Should have cf2");
154
155    // Clean up the cloned table
156    UTIL.getAdmin().disableTable(restoredTableName);
157    UTIL.getAdmin().deleteTable(restoredTableName);
158  }
159
160  public static class FailingModifyTableProcedure extends ModifyTableProcedure {
161    private boolean failOnce = false;
162
163    public FailingModifyTableProcedure() {
164      super();
165    }
166
167    public FailingModifyTableProcedure(MasterProcedureEnv env, TableDescriptor newTableDescriptor)
168      throws HBaseIOException {
169      super(env, newTableDescriptor);
170    }
171
172    @Override
173    protected Flow executeFromState(MasterProcedureEnv env, ModifyTableState state)
174      throws InterruptedException {
175      if (!failOnce && state == ModifyTableState.MODIFY_TABLE_CLOSE_EXCESS_REPLICAS) {
176        failOnce = true;
177        throw new RuntimeException("Simulated failure");
178      }
179      return super.executeFromState(env, state);
180    }
181  }
182}