001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022import static org.junit.Assert.fail; 023 024import java.io.IOException; 025import java.util.concurrent.CountDownLatch; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtility; 029import org.apache.hadoop.hbase.StartMiniClusterOption; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.client.RegionInfo; 032import org.apache.hadoop.hbase.client.TableDescriptor; 033import org.apache.hadoop.hbase.master.HMaster; 034import org.apache.hadoop.hbase.procedure2.Procedure; 035import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 036import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure; 037import org.apache.hadoop.hbase.procedure2.store.ProcedureStore; 038import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; 039import org.apache.hadoop.hbase.testclassification.LargeTests; 040import org.apache.hadoop.hbase.testclassification.MasterTests; 041import org.apache.hadoop.hbase.util.ModifyRegionUtils; 042import org.junit.After; 043import org.junit.Before; 044import org.junit.ClassRule; 045import org.junit.Ignore; 046import org.junit.Rule; 047import org.junit.Test; 048import org.junit.experimental.categories.Category; 049import org.junit.rules.TestName; 050import org.mockito.Mockito; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054@Category({MasterTests.class, LargeTests.class}) 055@Ignore 056public class TestMasterProcedureWalLease { 057 058 @ClassRule 059 public static final HBaseClassTestRule CLASS_RULE = 060 HBaseClassTestRule.forClass(TestMasterProcedureWalLease.class); 061 062 private static final Logger LOG = LoggerFactory.getLogger(TestMasterProcedureWalLease.class); 063 064 @Rule 065 public TestName name = new TestName(); 066 067 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 068 069 private static void setupConf(Configuration conf) { 070 // don't waste time retrying with the roll, the test is already slow enough. 071 conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1); 072 conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0); 073 conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1); 074 conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1); 075 } 076 077 @Before 078 public void setup() throws Exception { 079 setupConf(UTIL.getConfiguration()); 080 StartMiniClusterOption option = StartMiniClusterOption.builder() 081 .numMasters(2).numRegionServers(3).numDataNodes(3).build(); 082 UTIL.startMiniCluster(option); 083 } 084 085 @After 086 public void tearDown() throws Exception { 087 try { 088 UTIL.shutdownMiniCluster(); 089 } catch (Exception e) { 090 LOG.warn("failure shutting down cluster", e); 091 } 092 } 093 094 @Test 095 public void testWalRecoverLease() throws Exception { 096 final ProcedureStore masterStore = getMasterProcedureExecutor().getStore(); 097 assertTrue("expected WALStore for this test", masterStore instanceof WALProcedureStore); 098 099 HMaster firstMaster = UTIL.getHBaseCluster().getMaster(); 100 // Abort Latch for the master store 101 final CountDownLatch masterStoreAbort = new CountDownLatch(1); 102 masterStore.registerListener(new ProcedureStore.ProcedureStoreListener() { 103 @Override 104 public void postSync() {} 105 106 @Override 107 public void abortProcess() { 108 LOG.debug("Abort store of Master"); 109 masterStoreAbort.countDown(); 110 } 111 }); 112 113 // startup a fake master the new WAL store will take the lease 114 // and the active master should abort. 115 HMaster backupMaster3 = Mockito.mock(HMaster.class); 116 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration(); 117 Mockito.doReturn(true).when(backupMaster3).isActiveMaster(); 118 final WALProcedureStore backupStore3 = new WALProcedureStore(firstMaster.getConfiguration(), 119 ((WALProcedureStore)masterStore).getWALDir(), 120 null, 121 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3)); 122 // Abort Latch for the test store 123 final CountDownLatch backupStore3Abort = new CountDownLatch(1); 124 backupStore3.registerListener(new ProcedureStore.ProcedureStoreListener() { 125 @Override 126 public void postSync() {} 127 128 @Override 129 public void abortProcess() { 130 LOG.debug("Abort store of backupMaster3"); 131 backupStore3Abort.countDown(); 132 backupStore3.stop(true); 133 } 134 }); 135 backupStore3.start(1); 136 backupStore3.recoverLease(); 137 138 // Try to trigger a command on the master (WAL lease expired on the active one) 139 TableDescriptor htd = MasterProcedureTestingUtility.createHTD(TableName.valueOf(name.getMethodName()), "f"); 140 RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null); 141 LOG.debug("submit proc"); 142 try { 143 getMasterProcedureExecutor().submitProcedure( 144 new CreateTableProcedure(getMasterProcedureExecutor().getEnvironment(), htd, regions)); 145 fail("expected RuntimeException 'sync aborted'"); 146 } catch (RuntimeException e) { 147 LOG.info("got " + e.getMessage()); 148 } 149 LOG.debug("wait master store abort"); 150 masterStoreAbort.await(); 151 152 // Now the real backup master should start up 153 LOG.debug("wait backup master to startup"); 154 MasterProcedureTestingUtility.waitBackupMaster(UTIL, firstMaster); 155 assertEquals(true, firstMaster.isStopped()); 156 157 // wait the store in here to abort (the test will fail due to timeout if it doesn't) 158 LOG.debug("wait the store to abort"); 159 backupStore3.getStoreTracker().setDeleted(1, false); 160 try { 161 backupStore3.delete(1); 162 fail("expected RuntimeException 'sync aborted'"); 163 } catch (RuntimeException e) { 164 LOG.info("got " + e.getMessage()); 165 } 166 backupStore3Abort.await(); 167 } 168 169 /** 170 * Tests proper fencing in case the current WAL store is fenced 171 */ 172 @Test 173 public void testWALfencingWithoutWALRolling() throws IOException { 174 testWALfencing(false); 175 } 176 177 /** 178 * Tests proper fencing in case the current WAL store does not receive writes until after the 179 * new WAL does a couple of WAL rolls. 180 */ 181 @Test 182 public void testWALfencingWithWALRolling() throws IOException { 183 testWALfencing(true); 184 } 185 186 public void testWALfencing(boolean walRolls) throws IOException { 187 final ProcedureStore procStore = getMasterProcedureExecutor().getStore(); 188 assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore); 189 190 HMaster firstMaster = UTIL.getHBaseCluster().getMaster(); 191 192 // cause WAL rolling after a delete in WAL: 193 firstMaster.getConfiguration().setLong(WALProcedureStore.ROLL_THRESHOLD_CONF_KEY, 1); 194 195 HMaster backupMaster3 = Mockito.mock(HMaster.class); 196 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration(); 197 Mockito.doReturn(true).when(backupMaster3).isActiveMaster(); 198 final WALProcedureStore procStore2 = new WALProcedureStore(firstMaster.getConfiguration(), 199 ((WALProcedureStore)procStore).getWALDir(), 200 null, 201 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3)); 202 203 // start a second store which should fence the first one out 204 LOG.info("Starting new WALProcedureStore"); 205 procStore2.start(1); 206 procStore2.recoverLease(); 207 208 // before writing back to the WAL store, optionally do a couple of WAL rolls (which causes 209 // to delete the old WAL files). 210 if (walRolls) { 211 LOG.info("Inserting into second WALProcedureStore, causing WAL rolls"); 212 for (int i = 0; i < 512; i++) { 213 // insert something to the second store then delete it, causing a WAL roll(s) 214 Procedure proc2 = new TestProcedure(i); 215 procStore2.insert(proc2, null); 216 procStore2.delete(proc2.getProcId()); // delete the procedure so that the WAL is removed later 217 } 218 } 219 220 // Now, insert something to the first store, should fail. 221 // If the store does a WAL roll and continue with another logId without checking higher logIds 222 // it will incorrectly succeed. 223 LOG.info("Inserting into first WALProcedureStore"); 224 try { 225 procStore.insert(new TestProcedure(11), null); 226 fail("Inserting into Procedure Store should have failed"); 227 } catch (Exception ex) { 228 LOG.info("Received expected exception", ex); 229 } 230 } 231 232 // ========================================================================== 233 // Helpers 234 // ========================================================================== 235 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { 236 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 237 } 238}