001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022import static org.junit.Assert.fail; 023 024import java.io.IOException; 025import java.util.concurrent.CountDownLatch; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtility; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.RegionInfo; 031import org.apache.hadoop.hbase.client.TableDescriptor; 032import org.apache.hadoop.hbase.master.HMaster; 033import org.apache.hadoop.hbase.procedure2.Procedure; 034import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 035import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure; 036import org.apache.hadoop.hbase.procedure2.store.ProcedureStore; 037import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; 038import org.apache.hadoop.hbase.testclassification.LargeTests; 039import org.apache.hadoop.hbase.testclassification.MasterTests; 040import org.apache.hadoop.hbase.util.ModifyRegionUtils; 041import org.junit.After; 042import org.junit.Before; 043import org.junit.ClassRule; 044import org.junit.Ignore; 045import org.junit.Rule; 046import org.junit.Test; 047import org.junit.experimental.categories.Category; 048import org.junit.rules.TestName; 049import org.mockito.Mockito; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053@Category({MasterTests.class, LargeTests.class}) 054@Ignore 055public class TestMasterProcedureWalLease { 056 057 @ClassRule 058 public static final HBaseClassTestRule CLASS_RULE = 059 HBaseClassTestRule.forClass(TestMasterProcedureWalLease.class); 060 061 private static final Logger LOG = LoggerFactory.getLogger(TestMasterProcedureWalLease.class); 062 063 @Rule 064 public TestName name = new TestName(); 065 066 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 067 068 private static void setupConf(Configuration conf) { 069 // don't waste time retrying with the roll, the test is already slow enough. 070 conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1); 071 conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0); 072 conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1); 073 conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1); 074 } 075 076 @Before 077 public void setup() throws Exception { 078 setupConf(UTIL.getConfiguration()); 079 UTIL.startMiniCluster(2, 3); 080 } 081 082 @After 083 public void tearDown() throws Exception { 084 try { 085 UTIL.shutdownMiniCluster(); 086 } catch (Exception e) { 087 LOG.warn("failure shutting down cluster", e); 088 } 089 } 090 091 @Test 092 public void testWalRecoverLease() throws Exception { 093 final ProcedureStore masterStore = getMasterProcedureExecutor().getStore(); 094 assertTrue("expected WALStore for this test", masterStore instanceof WALProcedureStore); 095 096 HMaster firstMaster = UTIL.getHBaseCluster().getMaster(); 097 // Abort Latch for the master store 098 final CountDownLatch masterStoreAbort = new CountDownLatch(1); 099 masterStore.registerListener(new ProcedureStore.ProcedureStoreListener() { 100 @Override 101 public void postSync() {} 102 103 @Override 104 public void abortProcess() { 105 LOG.debug("Abort store of Master"); 106 masterStoreAbort.countDown(); 107 } 108 }); 109 110 // startup a fake master the new WAL store will take the lease 111 // and the active master should abort. 112 HMaster backupMaster3 = Mockito.mock(HMaster.class); 113 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration(); 114 Mockito.doReturn(true).when(backupMaster3).isActiveMaster(); 115 final WALProcedureStore backupStore3 = new WALProcedureStore(firstMaster.getConfiguration(), 116 ((WALProcedureStore)masterStore).getWALDir(), 117 null, 118 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3)); 119 // Abort Latch for the test store 120 final CountDownLatch backupStore3Abort = new CountDownLatch(1); 121 backupStore3.registerListener(new ProcedureStore.ProcedureStoreListener() { 122 @Override 123 public void postSync() {} 124 125 @Override 126 public void abortProcess() { 127 LOG.debug("Abort store of backupMaster3"); 128 backupStore3Abort.countDown(); 129 backupStore3.stop(true); 130 } 131 }); 132 backupStore3.start(1); 133 backupStore3.recoverLease(); 134 135 // Try to trigger a command on the master (WAL lease expired on the active one) 136 TableDescriptor htd = MasterProcedureTestingUtility.createHTD(TableName.valueOf(name.getMethodName()), "f"); 137 RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null); 138 LOG.debug("submit proc"); 139 try { 140 getMasterProcedureExecutor().submitProcedure( 141 new CreateTableProcedure(getMasterProcedureExecutor().getEnvironment(), htd, regions)); 142 fail("expected RuntimeException 'sync aborted'"); 143 } catch (RuntimeException e) { 144 LOG.info("got " + e.getMessage()); 145 } 146 LOG.debug("wait master store abort"); 147 masterStoreAbort.await(); 148 149 // Now the real backup master should start up 150 LOG.debug("wait backup master to startup"); 151 MasterProcedureTestingUtility.waitBackupMaster(UTIL, firstMaster); 152 assertEquals(true, firstMaster.isStopped()); 153 154 // wait the store in here to abort (the test will fail due to timeout if it doesn't) 155 LOG.debug("wait the store to abort"); 156 backupStore3.getStoreTracker().setDeleted(1, false); 157 try { 158 backupStore3.delete(1); 159 fail("expected RuntimeException 'sync aborted'"); 160 } catch (RuntimeException e) { 161 LOG.info("got " + e.getMessage()); 162 } 163 backupStore3Abort.await(); 164 } 165 166 /** 167 * Tests proper fencing in case the current WAL store is fenced 168 */ 169 @Test 170 public void testWALfencingWithoutWALRolling() throws IOException { 171 testWALfencing(false); 172 } 173 174 /** 175 * Tests proper fencing in case the current WAL store does not receive writes until after the 176 * new WAL does a couple of WAL rolls. 177 */ 178 @Test 179 public void testWALfencingWithWALRolling() throws IOException { 180 testWALfencing(true); 181 } 182 183 public void testWALfencing(boolean walRolls) throws IOException { 184 final ProcedureStore procStore = getMasterProcedureExecutor().getStore(); 185 assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore); 186 187 HMaster firstMaster = UTIL.getHBaseCluster().getMaster(); 188 189 // cause WAL rolling after a delete in WAL: 190 firstMaster.getConfiguration().setLong(WALProcedureStore.ROLL_THRESHOLD_CONF_KEY, 1); 191 192 HMaster backupMaster3 = Mockito.mock(HMaster.class); 193 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration(); 194 Mockito.doReturn(true).when(backupMaster3).isActiveMaster(); 195 final WALProcedureStore procStore2 = new WALProcedureStore(firstMaster.getConfiguration(), 196 ((WALProcedureStore)procStore).getWALDir(), 197 null, 198 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3)); 199 200 // start a second store which should fence the first one out 201 LOG.info("Starting new WALProcedureStore"); 202 procStore2.start(1); 203 procStore2.recoverLease(); 204 205 // before writing back to the WAL store, optionally do a couple of WAL rolls (which causes 206 // to delete the old WAL files). 207 if (walRolls) { 208 LOG.info("Inserting into second WALProcedureStore, causing WAL rolls"); 209 for (int i = 0; i < 512; i++) { 210 // insert something to the second store then delete it, causing a WAL roll(s) 211 Procedure proc2 = new TestProcedure(i); 212 procStore2.insert(proc2, null); 213 procStore2.delete(proc2.getProcId()); // delete the procedure so that the WAL is removed later 214 } 215 } 216 217 // Now, insert something to the first store, should fail. 218 // If the store does a WAL roll and continue with another logId without checking higher logIds 219 // it will incorrectly succeed. 220 LOG.info("Inserting into first WALProcedureStore"); 221 try { 222 procStore.insert(new TestProcedure(11), null); 223 fail("Inserting into Procedure Store should have failed"); 224 } catch (Exception ex) { 225 LOG.info("Received expected exception", ex); 226 } 227 } 228 229 // ========================================================================== 230 // Helpers 231 // ========================================================================== 232 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { 233 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 234 } 235}