001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.junit.Assert.assertEquals; 021 022import org.apache.hadoop.conf.Configuration; 023import org.apache.hadoop.fs.Path; 024import org.apache.hadoop.hbase.HBaseClassTestRule; 025import org.apache.hadoop.hbase.HBaseTestingUtility; 026import org.apache.hadoop.hbase.TableName; 027import org.apache.hadoop.hbase.client.RegionInfo; 028import org.apache.hadoop.hbase.client.TableDescriptor; 029import org.apache.hadoop.hbase.procedure2.Procedure; 030import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 031import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 032import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; 033import org.apache.hadoop.hbase.testclassification.LargeTests; 034import org.apache.hadoop.hbase.testclassification.MasterTests; 035import org.apache.hadoop.hbase.util.Bytes; 036import org.apache.hadoop.hbase.util.FSUtils; 037import org.apache.hadoop.hbase.util.ModifyRegionUtils; 038import org.junit.After; 039import org.junit.Before; 040import org.junit.ClassRule; 041import org.junit.Test; 042import org.junit.experimental.categories.Category; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState; 047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DeleteTableState; 048import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DisableTableState; 049import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.EnableTableState; 050import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateTableState; 051 052@Category({MasterTests.class, LargeTests.class}) 053public class TestMasterFailoverWithProcedures { 054 055 @ClassRule 056 public static final HBaseClassTestRule CLASS_RULE = 057 HBaseClassTestRule.forClass(TestMasterFailoverWithProcedures.class); 058 059 private static final Logger LOG = LoggerFactory.getLogger(TestMasterFailoverWithProcedures.class); 060 061 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 062 063 private static void setupConf(Configuration conf) { 064 // don't waste time retrying with the roll, the test is already slow enough. 065 conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1); 066 conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0); 067 conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1); 068 conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1); 069 conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); 070 conf.setInt(MasterProcedureConstants.MASTER_URGENT_PROCEDURE_THREADS, 0); 071 } 072 073 @Before 074 public void setup() throws Exception { 075 setupConf(UTIL.getConfiguration()); 076 UTIL.startMiniCluster(2, 1); 077 078 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 079 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, false); 080 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, false); 081 } 082 083 @After 084 public void tearDown() throws Exception { 085 try { 086 UTIL.shutdownMiniCluster(); 087 } catch (Exception e) { 088 LOG.warn("failure shutting down cluster", e); 089 } 090 } 091 092 // ========================================================================== 093 // Test Create Table 094 // ========================================================================== 095 @Test 096 public void testCreateWithFailover() throws Exception { 097 // TODO: Should we try every step? (master failover takes long time) 098 // It is already covered by TestCreateTableProcedure 099 // but without the master restart, only the executor/store is restarted. 100 // Without Master restart we may not find bug in the procedure code 101 // like missing "wait" for resources to be available (e.g. RS) 102 testCreateWithFailoverAtStep(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS.ordinal()); 103 } 104 105 private void testCreateWithFailoverAtStep(final int step) throws Exception { 106 final TableName tableName = TableName.valueOf("testCreateWithFailoverAtStep" + step); 107 108 // create the table 109 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 110 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true); 111 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true); 112 113 // Start the Create procedure && kill the executor 114 byte[][] splitKeys = null; 115 TableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, "f1", "f2"); 116 RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, splitKeys); 117 long procId = procExec.submitProcedure( 118 new CreateTableProcedure(procExec.getEnvironment(), htd, regions)); 119 testRecoveryAndDoubleExecution(UTIL, procId, step); 120 121 MasterProcedureTestingUtility.validateTableCreation( 122 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2"); 123 } 124 125 // ========================================================================== 126 // Test Delete Table 127 // ========================================================================== 128 @Test 129 public void testDeleteWithFailover() throws Exception { 130 // TODO: Should we try every step? (master failover takes long time) 131 // It is already covered by TestDeleteTableProcedure 132 // but without the master restart, only the executor/store is restarted. 133 // Without Master restart we may not find bug in the procedure code 134 // like missing "wait" for resources to be available (e.g. RS) 135 testDeleteWithFailoverAtStep(DeleteTableState.DELETE_TABLE_UNASSIGN_REGIONS.ordinal()); 136 } 137 138 private void testDeleteWithFailoverAtStep(final int step) throws Exception { 139 final TableName tableName = TableName.valueOf("testDeleteWithFailoverAtStep" + step); 140 141 // create the table 142 byte[][] splitKeys = null; 143 RegionInfo[] regions = MasterProcedureTestingUtility.createTable( 144 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2"); 145 Path tableDir = FSUtils.getTableDir(getRootDir(), tableName); 146 MasterProcedureTestingUtility.validateTableCreation( 147 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2"); 148 UTIL.getAdmin().disableTable(tableName); 149 150 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 151 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true); 152 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true); 153 154 // Start the Delete procedure && kill the executor 155 long procId = procExec.submitProcedure( 156 new DeleteTableProcedure(procExec.getEnvironment(), tableName)); 157 testRecoveryAndDoubleExecution(UTIL, procId, step); 158 159 MasterProcedureTestingUtility.validateTableDeletion( 160 UTIL.getHBaseCluster().getMaster(), tableName); 161 } 162 163 // ========================================================================== 164 // Test Truncate Table 165 // ========================================================================== 166 @Test 167 public void testTruncateWithFailover() throws Exception { 168 // TODO: Should we try every step? (master failover takes long time) 169 // It is already covered by TestTruncateTableProcedure 170 // but without the master restart, only the executor/store is restarted. 171 // Without Master restart we may not find bug in the procedure code 172 // like missing "wait" for resources to be available (e.g. RS) 173 testTruncateWithFailoverAtStep(true, TruncateTableState.TRUNCATE_TABLE_ADD_TO_META.ordinal()); 174 } 175 176 private void testTruncateWithFailoverAtStep(final boolean preserveSplits, final int step) 177 throws Exception { 178 final TableName tableName = TableName.valueOf("testTruncateWithFailoverAtStep" + step); 179 180 // create the table 181 final String[] families = new String[] { "f1", "f2" }; 182 final byte[][] splitKeys = new byte[][] { 183 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") 184 }; 185 RegionInfo[] regions = MasterProcedureTestingUtility.createTable( 186 getMasterProcedureExecutor(), tableName, splitKeys, families); 187 // load and verify that there are rows in the table 188 MasterProcedureTestingUtility.loadData( 189 UTIL.getConnection(), tableName, 100, splitKeys, families); 190 assertEquals(100, UTIL.countRows(tableName)); 191 // disable the table 192 UTIL.getAdmin().disableTable(tableName); 193 194 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 195 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 196 197 // Start the Truncate procedure && kill the executor 198 long procId = procExec.submitProcedure( 199 new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits)); 200 testRecoveryAndDoubleExecution(UTIL, procId, step); 201 202 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 203 UTIL.waitUntilAllRegionsAssigned(tableName); 204 205 // validate the table regions and layout 206 regions = UTIL.getAdmin().getTableRegions(tableName).toArray(new RegionInfo[0]); 207 if (preserveSplits) { 208 assertEquals(1 + splitKeys.length, regions.length); 209 } else { 210 assertEquals(1, regions.length); 211 } 212 MasterProcedureTestingUtility.validateTableCreation( 213 UTIL.getHBaseCluster().getMaster(), tableName, regions, families); 214 215 // verify that there are no rows in the table 216 assertEquals(0, UTIL.countRows(tableName)); 217 218 // verify that the table is read/writable 219 MasterProcedureTestingUtility.loadData( 220 UTIL.getConnection(), tableName, 50, splitKeys, families); 221 assertEquals(50, UTIL.countRows(tableName)); 222 } 223 224 // ========================================================================== 225 // Test Disable Table 226 // ========================================================================== 227 @Test 228 public void testDisableTableWithFailover() throws Exception { 229 // TODO: Should we try every step? (master failover takes long time) 230 // It is already covered by TestDisableTableProcedure 231 // but without the master restart, only the executor/store is restarted. 232 // Without Master restart we may not find bug in the procedure code 233 // like missing "wait" for resources to be available (e.g. RS) 234 testDisableTableWithFailoverAtStep( 235 DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE.ordinal()); 236 } 237 238 private void testDisableTableWithFailoverAtStep(final int step) throws Exception { 239 final TableName tableName = TableName.valueOf("testDisableTableWithFailoverAtStep" + step); 240 241 // create the table 242 final byte[][] splitKeys = new byte[][] { 243 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") 244 }; 245 MasterProcedureTestingUtility.createTable( 246 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2"); 247 248 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 249 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 250 251 // Start the Delete procedure && kill the executor 252 long procId = procExec.submitProcedure( 253 new DisableTableProcedure(procExec.getEnvironment(), tableName, false)); 254 testRecoveryAndDoubleExecution(UTIL, procId, step); 255 256 MasterProcedureTestingUtility.validateTableIsDisabled( 257 UTIL.getHBaseCluster().getMaster(), tableName); 258 } 259 260 // ========================================================================== 261 // Test Enable Table 262 // ========================================================================== 263 @Test 264 public void testEnableTableWithFailover() throws Exception { 265 // TODO: Should we try every step? (master failover takes long time) 266 // It is already covered by TestEnableTableProcedure 267 // but without the master restart, only the executor/store is restarted. 268 // Without Master restart we may not find bug in the procedure code 269 // like missing "wait" for resources to be available (e.g. RS) 270 testEnableTableWithFailoverAtStep( 271 EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE.ordinal()); 272 } 273 274 private void testEnableTableWithFailoverAtStep(final int step) throws Exception { 275 final TableName tableName = TableName.valueOf("testEnableTableWithFailoverAtStep" + step); 276 277 // create the table 278 final byte[][] splitKeys = new byte[][] { 279 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") 280 }; 281 MasterProcedureTestingUtility.createTable( 282 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2"); 283 UTIL.getAdmin().disableTable(tableName); 284 285 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 286 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 287 288 // Start the Delete procedure && kill the executor 289 long procId = procExec.submitProcedure( 290 new EnableTableProcedure(procExec.getEnvironment(), tableName, false)); 291 testRecoveryAndDoubleExecution(UTIL, procId, step); 292 293 MasterProcedureTestingUtility.validateTableIsEnabled( 294 UTIL.getHBaseCluster().getMaster(), tableName); 295 } 296 297 // ========================================================================== 298 // Test Helpers 299 // ========================================================================== 300 public static void testRecoveryAndDoubleExecution(final HBaseTestingUtility testUtil, 301 final long procId, final int lastStepBeforeFailover) throws Exception { 302 ProcedureExecutor<MasterProcedureEnv> procExec = 303 testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 304 ProcedureTestingUtility.waitProcedure(procExec, procId); 305 306 final Procedure proc = procExec.getProcedure(procId); 307 for (int i = 0; i < lastStepBeforeFailover; ++i) { 308 LOG.info("Restart "+ i +" exec state: " + proc); 309 ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); 310 MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec); 311 ProcedureTestingUtility.waitProcedure(procExec, procId); 312 } 313 ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); 314 315 LOG.info("Trigger master failover"); 316 MasterProcedureTestingUtility.masterFailover(testUtil); 317 318 procExec = testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 319 ProcedureTestingUtility.waitProcedure(procExec, procId); 320 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 321 } 322 323 // ========================================================================== 324 // Helpers 325 // ========================================================================== 326 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { 327 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 328 } 329 330 private Path getRootDir() { 331 return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); 332 } 333}