001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.Assert.assertEquals;
021
022import org.apache.hadoop.conf.Configuration;
023import org.apache.hadoop.fs.Path;
024import org.apache.hadoop.hbase.HBaseClassTestRule;
025import org.apache.hadoop.hbase.HBaseTestingUtility;
026import org.apache.hadoop.hbase.StartMiniClusterOption;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.client.TableDescriptor;
030import org.apache.hadoop.hbase.procedure2.Procedure;
031import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
032import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
033import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
034import org.apache.hadoop.hbase.testclassification.LargeTests;
035import org.apache.hadoop.hbase.testclassification.MasterTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.FSUtils;
038import org.apache.hadoop.hbase.util.ModifyRegionUtils;
039import org.junit.After;
040import org.junit.Before;
041import org.junit.ClassRule;
042import org.junit.Test;
043import org.junit.experimental.categories.Category;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState;
048import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DeleteTableState;
049import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DisableTableState;
050import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.EnableTableState;
051import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateTableState;
052
053@Category({MasterTests.class, LargeTests.class})
054public class TestMasterFailoverWithProcedures {
055
056  @ClassRule
057  public static final HBaseClassTestRule CLASS_RULE =
058      HBaseClassTestRule.forClass(TestMasterFailoverWithProcedures.class);
059
060  private static final Logger LOG = LoggerFactory.getLogger(TestMasterFailoverWithProcedures.class);
061
062  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
063
064  private static void setupConf(Configuration conf) {
065    // don't waste time retrying with the roll, the test is already slow enough.
066    conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1);
067    conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0);
068    conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1);
069    conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1);
070    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
071  }
072
073  @Before
074  public void setup() throws Exception {
075    setupConf(UTIL.getConfiguration());
076    // Set master number and use default values for other options.
077    StartMiniClusterOption option = StartMiniClusterOption.builder().numMasters(2).build();
078    UTIL.startMiniCluster(option);
079
080    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
081    ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, false);
082    ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, false);
083  }
084
085  @After
086  public void tearDown() throws Exception {
087    try {
088      UTIL.shutdownMiniCluster();
089    } catch (Exception e) {
090      LOG.warn("failure shutting down cluster", e);
091    }
092  }
093
094  // ==========================================================================
095  //  Test Create Table
096  // ==========================================================================
097  @Test
098  public void testCreateWithFailover() throws Exception {
099    // TODO: Should we try every step? (master failover takes long time)
100    // It is already covered by TestCreateTableProcedure
101    // but without the master restart, only the executor/store is restarted.
102    // Without Master restart we may not find bug in the procedure code
103    // like missing "wait" for resources to be available (e.g. RS)
104    testCreateWithFailoverAtStep(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS.ordinal());
105  }
106
107  private void testCreateWithFailoverAtStep(final int step) throws Exception {
108    final TableName tableName = TableName.valueOf("testCreateWithFailoverAtStep" + step);
109
110    // create the table
111    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
112    ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
113    ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
114
115    // Start the Create procedure && kill the executor
116    byte[][] splitKeys = null;
117    TableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, "f1", "f2");
118    RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, splitKeys);
119    long procId = procExec.submitProcedure(
120        new CreateTableProcedure(procExec.getEnvironment(), htd, regions));
121    testRecoveryAndDoubleExecution(UTIL, procId, step);
122
123    MasterProcedureTestingUtility.validateTableCreation(
124        UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
125  }
126
127  // ==========================================================================
128  //  Test Delete Table
129  // ==========================================================================
130  @Test
131  public void testDeleteWithFailover() throws Exception {
132    // TODO: Should we try every step? (master failover takes long time)
133    // It is already covered by TestDeleteTableProcedure
134    // but without the master restart, only the executor/store is restarted.
135    // Without Master restart we may not find bug in the procedure code
136    // like missing "wait" for resources to be available (e.g. RS)
137    testDeleteWithFailoverAtStep(DeleteTableState.DELETE_TABLE_UNASSIGN_REGIONS.ordinal());
138  }
139
140  private void testDeleteWithFailoverAtStep(final int step) throws Exception {
141    final TableName tableName = TableName.valueOf("testDeleteWithFailoverAtStep" + step);
142
143    // create the table
144    byte[][] splitKeys = null;
145    RegionInfo[] regions = MasterProcedureTestingUtility.createTable(
146        getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
147    Path tableDir = FSUtils.getTableDir(getRootDir(), tableName);
148    MasterProcedureTestingUtility.validateTableCreation(
149        UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
150    UTIL.getAdmin().disableTable(tableName);
151
152    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
153    ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
154    ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
155
156    // Start the Delete procedure && kill the executor
157    long procId = procExec.submitProcedure(
158        new DeleteTableProcedure(procExec.getEnvironment(), tableName));
159    testRecoveryAndDoubleExecution(UTIL, procId, step);
160
161    MasterProcedureTestingUtility.validateTableDeletion(
162        UTIL.getHBaseCluster().getMaster(), tableName);
163  }
164
165  // ==========================================================================
166  //  Test Truncate Table
167  // ==========================================================================
168  @Test
169  public void testTruncateWithFailover() throws Exception {
170    // TODO: Should we try every step? (master failover takes long time)
171    // It is already covered by TestTruncateTableProcedure
172    // but without the master restart, only the executor/store is restarted.
173    // Without Master restart we may not find bug in the procedure code
174    // like missing "wait" for resources to be available (e.g. RS)
175    testTruncateWithFailoverAtStep(true, TruncateTableState.TRUNCATE_TABLE_ADD_TO_META.ordinal());
176  }
177
178  private void testTruncateWithFailoverAtStep(final boolean preserveSplits, final int step)
179      throws Exception {
180    final TableName tableName = TableName.valueOf("testTruncateWithFailoverAtStep" + step);
181
182    // create the table
183    final String[] families = new String[] { "f1", "f2" };
184    final byte[][] splitKeys = new byte[][] {
185        Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
186    };
187    RegionInfo[] regions = MasterProcedureTestingUtility.createTable(
188        getMasterProcedureExecutor(), tableName, splitKeys, families);
189    // load and verify that there are rows in the table
190    MasterProcedureTestingUtility.loadData(
191        UTIL.getConnection(), tableName, 100, splitKeys, families);
192    assertEquals(100, UTIL.countRows(tableName));
193    // disable the table
194    UTIL.getAdmin().disableTable(tableName);
195
196    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
197    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
198
199    // Start the Truncate procedure && kill the executor
200    long procId = procExec.submitProcedure(
201        new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
202    testRecoveryAndDoubleExecution(UTIL, procId, step);
203
204    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
205    UTIL.waitUntilAllRegionsAssigned(tableName);
206
207    // validate the table regions and layout
208    regions = UTIL.getAdmin().getTableRegions(tableName).toArray(new RegionInfo[0]);
209    if (preserveSplits) {
210      assertEquals(1 + splitKeys.length, regions.length);
211    } else {
212      assertEquals(1, regions.length);
213    }
214    MasterProcedureTestingUtility.validateTableCreation(
215        UTIL.getHBaseCluster().getMaster(), tableName, regions, families);
216
217    // verify that there are no rows in the table
218    assertEquals(0, UTIL.countRows(tableName));
219
220    // verify that the table is read/writable
221    MasterProcedureTestingUtility.loadData(
222        UTIL.getConnection(), tableName, 50, splitKeys, families);
223    assertEquals(50, UTIL.countRows(tableName));
224  }
225
226  // ==========================================================================
227  //  Test Disable Table
228  // ==========================================================================
229  @Test
230  public void testDisableTableWithFailover() throws Exception {
231    // TODO: Should we try every step? (master failover takes long time)
232    // It is already covered by TestDisableTableProcedure
233    // but without the master restart, only the executor/store is restarted.
234    // Without Master restart we may not find bug in the procedure code
235    // like missing "wait" for resources to be available (e.g. RS)
236    testDisableTableWithFailoverAtStep(
237        DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE.ordinal());
238  }
239
240  private void testDisableTableWithFailoverAtStep(final int step) throws Exception {
241    final TableName tableName = TableName.valueOf("testDisableTableWithFailoverAtStep" + step);
242
243    // create the table
244    final byte[][] splitKeys = new byte[][] {
245        Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
246    };
247    MasterProcedureTestingUtility.createTable(
248        getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
249
250    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
251    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
252
253    // Start the Delete procedure && kill the executor
254    long procId = procExec.submitProcedure(
255        new DisableTableProcedure(procExec.getEnvironment(), tableName, false));
256    testRecoveryAndDoubleExecution(UTIL, procId, step);
257
258    MasterProcedureTestingUtility.validateTableIsDisabled(
259        UTIL.getHBaseCluster().getMaster(), tableName);
260  }
261
262  // ==========================================================================
263  //  Test Enable Table
264  // ==========================================================================
265  @Test
266  public void testEnableTableWithFailover() throws Exception {
267    // TODO: Should we try every step? (master failover takes long time)
268    // It is already covered by TestEnableTableProcedure
269    // but without the master restart, only the executor/store is restarted.
270    // Without Master restart we may not find bug in the procedure code
271    // like missing "wait" for resources to be available (e.g. RS)
272    testEnableTableWithFailoverAtStep(
273        EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE.ordinal());
274  }
275
276  private void testEnableTableWithFailoverAtStep(final int step) throws Exception {
277    final TableName tableName = TableName.valueOf("testEnableTableWithFailoverAtStep" + step);
278
279    // create the table
280    final byte[][] splitKeys = new byte[][] {
281        Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
282    };
283    MasterProcedureTestingUtility.createTable(
284        getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
285    UTIL.getAdmin().disableTable(tableName);
286
287    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
288    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
289
290    // Start the Delete procedure && kill the executor
291    long procId = procExec.submitProcedure(
292        new EnableTableProcedure(procExec.getEnvironment(), tableName));
293    testRecoveryAndDoubleExecution(UTIL, procId, step);
294
295    MasterProcedureTestingUtility.validateTableIsEnabled(
296        UTIL.getHBaseCluster().getMaster(), tableName);
297  }
298
299  // ==========================================================================
300  //  Test Helpers
301  // ==========================================================================
302  public static void testRecoveryAndDoubleExecution(final HBaseTestingUtility testUtil,
303      final long procId, final int lastStepBeforeFailover) throws Exception {
304    ProcedureExecutor<MasterProcedureEnv> procExec =
305        testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
306    ProcedureTestingUtility.waitProcedure(procExec, procId);
307
308    final Procedure proc = procExec.getProcedure(procId);
309    for (int i = 0; i < lastStepBeforeFailover; ++i) {
310      LOG.info("Restart "+ i +" exec state: " + proc);
311      ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
312      MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec);
313      ProcedureTestingUtility.waitProcedure(procExec, procId);
314    }
315    ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
316
317    LOG.info("Trigger master failover");
318    MasterProcedureTestingUtility.masterFailover(testUtil);
319
320    procExec = testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
321    ProcedureTestingUtility.waitProcedure(procExec, procId);
322    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
323  }
324
325  // ==========================================================================
326  //  Helpers
327  // ==========================================================================
328  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
329    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
330  }
331
332  private Path getRootDir() {
333    return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
334  }
335}