001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.Assert.assertEquals;
021
022import org.apache.hadoop.conf.Configuration;
023import org.apache.hadoop.fs.Path;
024import org.apache.hadoop.hbase.HBaseClassTestRule;
025import org.apache.hadoop.hbase.HBaseTestingUtility;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.RegionInfo;
028import org.apache.hadoop.hbase.client.TableDescriptor;
029import org.apache.hadoop.hbase.procedure2.Procedure;
030import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
031import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
032import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
033import org.apache.hadoop.hbase.testclassification.LargeTests;
034import org.apache.hadoop.hbase.testclassification.MasterTests;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.hadoop.hbase.util.FSUtils;
037import org.apache.hadoop.hbase.util.ModifyRegionUtils;
038import org.junit.After;
039import org.junit.Before;
040import org.junit.ClassRule;
041import org.junit.Test;
042import org.junit.experimental.categories.Category;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState;
047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DeleteTableState;
048import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DisableTableState;
049import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.EnableTableState;
050import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateTableState;
051
052@Category({MasterTests.class, LargeTests.class})
053public class TestMasterFailoverWithProcedures {
054
055  @ClassRule
056  public static final HBaseClassTestRule CLASS_RULE =
057      HBaseClassTestRule.forClass(TestMasterFailoverWithProcedures.class);
058
059  private static final Logger LOG = LoggerFactory.getLogger(TestMasterFailoverWithProcedures.class);
060
061  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
062
063  private static void setupConf(Configuration conf) {
064    // don't waste time retrying with the roll, the test is already slow enough.
065    conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1);
066    conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0);
067    conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1);
068    conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1);
069    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
070    conf.setInt(MasterProcedureConstants.MASTER_URGENT_PROCEDURE_THREADS, 0);
071  }
072
073  @Before
074  public void setup() throws Exception {
075    setupConf(UTIL.getConfiguration());
076    UTIL.startMiniCluster(2, 1);
077
078    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
079    ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, false);
080    ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, false);
081  }
082
083  @After
084  public void tearDown() throws Exception {
085    try {
086      UTIL.shutdownMiniCluster();
087    } catch (Exception e) {
088      LOG.warn("failure shutting down cluster", e);
089    }
090  }
091
092  // ==========================================================================
093  //  Test Create Table
094  // ==========================================================================
095  @Test
096  public void testCreateWithFailover() throws Exception {
097    // TODO: Should we try every step? (master failover takes long time)
098    // It is already covered by TestCreateTableProcedure
099    // but without the master restart, only the executor/store is restarted.
100    // Without Master restart we may not find bug in the procedure code
101    // like missing "wait" for resources to be available (e.g. RS)
102    testCreateWithFailoverAtStep(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS.ordinal());
103  }
104
105  private void testCreateWithFailoverAtStep(final int step) throws Exception {
106    final TableName tableName = TableName.valueOf("testCreateWithFailoverAtStep" + step);
107
108    // create the table
109    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
110    ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
111    ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
112
113    // Start the Create procedure && kill the executor
114    byte[][] splitKeys = null;
115    TableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, "f1", "f2");
116    RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, splitKeys);
117    long procId = procExec.submitProcedure(
118        new CreateTableProcedure(procExec.getEnvironment(), htd, regions));
119    testRecoveryAndDoubleExecution(UTIL, procId, step);
120
121    MasterProcedureTestingUtility.validateTableCreation(
122        UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
123  }
124
125  // ==========================================================================
126  //  Test Delete Table
127  // ==========================================================================
128  @Test
129  public void testDeleteWithFailover() throws Exception {
130    // TODO: Should we try every step? (master failover takes long time)
131    // It is already covered by TestDeleteTableProcedure
132    // but without the master restart, only the executor/store is restarted.
133    // Without Master restart we may not find bug in the procedure code
134    // like missing "wait" for resources to be available (e.g. RS)
135    testDeleteWithFailoverAtStep(DeleteTableState.DELETE_TABLE_UNASSIGN_REGIONS.ordinal());
136  }
137
138  private void testDeleteWithFailoverAtStep(final int step) throws Exception {
139    final TableName tableName = TableName.valueOf("testDeleteWithFailoverAtStep" + step);
140
141    // create the table
142    byte[][] splitKeys = null;
143    RegionInfo[] regions = MasterProcedureTestingUtility.createTable(
144        getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
145    Path tableDir = FSUtils.getTableDir(getRootDir(), tableName);
146    MasterProcedureTestingUtility.validateTableCreation(
147        UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
148    UTIL.getAdmin().disableTable(tableName);
149
150    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
151    ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
152    ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
153
154    // Start the Delete procedure && kill the executor
155    long procId = procExec.submitProcedure(
156        new DeleteTableProcedure(procExec.getEnvironment(), tableName));
157    testRecoveryAndDoubleExecution(UTIL, procId, step);
158
159    MasterProcedureTestingUtility.validateTableDeletion(
160        UTIL.getHBaseCluster().getMaster(), tableName);
161  }
162
163  // ==========================================================================
164  //  Test Truncate Table
165  // ==========================================================================
166  @Test
167  public void testTruncateWithFailover() throws Exception {
168    // TODO: Should we try every step? (master failover takes long time)
169    // It is already covered by TestTruncateTableProcedure
170    // but without the master restart, only the executor/store is restarted.
171    // Without Master restart we may not find bug in the procedure code
172    // like missing "wait" for resources to be available (e.g. RS)
173    testTruncateWithFailoverAtStep(true, TruncateTableState.TRUNCATE_TABLE_ADD_TO_META.ordinal());
174  }
175
176  private void testTruncateWithFailoverAtStep(final boolean preserveSplits, final int step)
177      throws Exception {
178    final TableName tableName = TableName.valueOf("testTruncateWithFailoverAtStep" + step);
179
180    // create the table
181    final String[] families = new String[] { "f1", "f2" };
182    final byte[][] splitKeys = new byte[][] {
183        Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
184    };
185    RegionInfo[] regions = MasterProcedureTestingUtility.createTable(
186        getMasterProcedureExecutor(), tableName, splitKeys, families);
187    // load and verify that there are rows in the table
188    MasterProcedureTestingUtility.loadData(
189        UTIL.getConnection(), tableName, 100, splitKeys, families);
190    assertEquals(100, UTIL.countRows(tableName));
191    // disable the table
192    UTIL.getAdmin().disableTable(tableName);
193
194    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
195    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
196
197    // Start the Truncate procedure && kill the executor
198    long procId = procExec.submitProcedure(
199        new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
200    testRecoveryAndDoubleExecution(UTIL, procId, step);
201
202    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
203    UTIL.waitUntilAllRegionsAssigned(tableName);
204
205    // validate the table regions and layout
206    regions = UTIL.getAdmin().getTableRegions(tableName).toArray(new RegionInfo[0]);
207    if (preserveSplits) {
208      assertEquals(1 + splitKeys.length, regions.length);
209    } else {
210      assertEquals(1, regions.length);
211    }
212    MasterProcedureTestingUtility.validateTableCreation(
213        UTIL.getHBaseCluster().getMaster(), tableName, regions, families);
214
215    // verify that there are no rows in the table
216    assertEquals(0, UTIL.countRows(tableName));
217
218    // verify that the table is read/writable
219    MasterProcedureTestingUtility.loadData(
220        UTIL.getConnection(), tableName, 50, splitKeys, families);
221    assertEquals(50, UTIL.countRows(tableName));
222  }
223
224  // ==========================================================================
225  //  Test Disable Table
226  // ==========================================================================
227  @Test
228  public void testDisableTableWithFailover() throws Exception {
229    // TODO: Should we try every step? (master failover takes long time)
230    // It is already covered by TestDisableTableProcedure
231    // but without the master restart, only the executor/store is restarted.
232    // Without Master restart we may not find bug in the procedure code
233    // like missing "wait" for resources to be available (e.g. RS)
234    testDisableTableWithFailoverAtStep(
235        DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE.ordinal());
236  }
237
238  private void testDisableTableWithFailoverAtStep(final int step) throws Exception {
239    final TableName tableName = TableName.valueOf("testDisableTableWithFailoverAtStep" + step);
240
241    // create the table
242    final byte[][] splitKeys = new byte[][] {
243        Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
244    };
245    MasterProcedureTestingUtility.createTable(
246        getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
247
248    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
249    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
250
251    // Start the Delete procedure && kill the executor
252    long procId = procExec.submitProcedure(
253        new DisableTableProcedure(procExec.getEnvironment(), tableName, false));
254    testRecoveryAndDoubleExecution(UTIL, procId, step);
255
256    MasterProcedureTestingUtility.validateTableIsDisabled(
257        UTIL.getHBaseCluster().getMaster(), tableName);
258  }
259
260  // ==========================================================================
261  //  Test Enable Table
262  // ==========================================================================
263  @Test
264  public void testEnableTableWithFailover() throws Exception {
265    // TODO: Should we try every step? (master failover takes long time)
266    // It is already covered by TestEnableTableProcedure
267    // but without the master restart, only the executor/store is restarted.
268    // Without Master restart we may not find bug in the procedure code
269    // like missing "wait" for resources to be available (e.g. RS)
270    testEnableTableWithFailoverAtStep(
271        EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE.ordinal());
272  }
273
274  private void testEnableTableWithFailoverAtStep(final int step) throws Exception {
275    final TableName tableName = TableName.valueOf("testEnableTableWithFailoverAtStep" + step);
276
277    // create the table
278    final byte[][] splitKeys = new byte[][] {
279        Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
280    };
281    MasterProcedureTestingUtility.createTable(
282        getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
283    UTIL.getAdmin().disableTable(tableName);
284
285    ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
286    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
287
288    // Start the Delete procedure && kill the executor
289    long procId = procExec.submitProcedure(
290        new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
291    testRecoveryAndDoubleExecution(UTIL, procId, step);
292
293    MasterProcedureTestingUtility.validateTableIsEnabled(
294        UTIL.getHBaseCluster().getMaster(), tableName);
295  }
296
297  // ==========================================================================
298  //  Test Helpers
299  // ==========================================================================
300  public static void testRecoveryAndDoubleExecution(final HBaseTestingUtility testUtil,
301      final long procId, final int lastStepBeforeFailover) throws Exception {
302    ProcedureExecutor<MasterProcedureEnv> procExec =
303        testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
304    ProcedureTestingUtility.waitProcedure(procExec, procId);
305
306    final Procedure proc = procExec.getProcedure(procId);
307    for (int i = 0; i < lastStepBeforeFailover; ++i) {
308      LOG.info("Restart "+ i +" exec state: " + proc);
309      ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
310      MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec);
311      ProcedureTestingUtility.waitProcedure(procExec, procId);
312    }
313    ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
314
315    LOG.info("Trigger master failover");
316    MasterProcedureTestingUtility.masterFailover(testUtil);
317
318    procExec = testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
319    ProcedureTestingUtility.waitProcedure(procExec, procId);
320    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
321  }
322
323  // ==========================================================================
324  //  Helpers
325  // ==========================================================================
326  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
327    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
328  }
329
330  private Path getRootDir() {
331    return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
332  }
333}