001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022import static org.junit.Assert.fail;
023
024import java.io.IOException;
025import java.util.concurrent.CountDownLatch;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.StartMiniClusterOption;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.client.RegionInfo;
032import org.apache.hadoop.hbase.client.TableDescriptor;
033import org.apache.hadoop.hbase.master.HMaster;
034import org.apache.hadoop.hbase.procedure2.Procedure;
035import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
036import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
037import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
038import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
039import org.apache.hadoop.hbase.testclassification.LargeTests;
040import org.apache.hadoop.hbase.testclassification.MasterTests;
041import org.apache.hadoop.hbase.util.ModifyRegionUtils;
042import org.junit.After;
043import org.junit.Before;
044import org.junit.ClassRule;
045import org.junit.Ignore;
046import org.junit.Rule;
047import org.junit.Test;
048import org.junit.experimental.categories.Category;
049import org.junit.rules.TestName;
050import org.mockito.Mockito;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054@Category({MasterTests.class, LargeTests.class})
055@Ignore
056public class TestMasterProcedureWalLease {
057
058  @ClassRule
059  public static final HBaseClassTestRule CLASS_RULE =
060      HBaseClassTestRule.forClass(TestMasterProcedureWalLease.class);
061
062  private static final Logger LOG = LoggerFactory.getLogger(TestMasterProcedureWalLease.class);
063
064  @Rule
065  public TestName name = new TestName();
066
067  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
068
069  private static void setupConf(Configuration conf) {
070    // don't waste time retrying with the roll, the test is already slow enough.
071    conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1);
072    conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0);
073    conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1);
074    conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1);
075  }
076
077  @Before
078  public void setup() throws Exception {
079    setupConf(UTIL.getConfiguration());
080    StartMiniClusterOption option = StartMiniClusterOption.builder()
081        .numMasters(2).numRegionServers(3).numDataNodes(3).build();
082    UTIL.startMiniCluster(option);
083  }
084
085  @After
086  public void tearDown() throws Exception {
087    try {
088      UTIL.shutdownMiniCluster();
089    } catch (Exception e) {
090      LOG.warn("failure shutting down cluster", e);
091    }
092  }
093
094  @Test
095  public void testWalRecoverLease() throws Exception {
096    final ProcedureStore masterStore = getMasterProcedureExecutor().getStore();
097    assertTrue("expected WALStore for this test", masterStore instanceof WALProcedureStore);
098
099    HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
100    // Abort Latch for the master store
101    final CountDownLatch masterStoreAbort = new CountDownLatch(1);
102    masterStore.registerListener(new ProcedureStore.ProcedureStoreListener() {
103      @Override
104      public void postSync() {}
105
106      @Override
107      public void abortProcess() {
108        LOG.debug("Abort store of Master");
109        masterStoreAbort.countDown();
110      }
111    });
112
113    // startup a fake master the new WAL store will take the lease
114    // and the active master should abort.
115    HMaster backupMaster3 = Mockito.mock(HMaster.class);
116    Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
117    Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
118    final WALProcedureStore backupStore3 = new WALProcedureStore(firstMaster.getConfiguration(),
119        ((WALProcedureStore)masterStore).getWALDir(),
120        null,
121        new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
122    // Abort Latch for the test store
123    final CountDownLatch backupStore3Abort = new CountDownLatch(1);
124    backupStore3.registerListener(new ProcedureStore.ProcedureStoreListener() {
125      @Override
126      public void postSync() {}
127
128      @Override
129      public void abortProcess() {
130        LOG.debug("Abort store of backupMaster3");
131        backupStore3Abort.countDown();
132        backupStore3.stop(true);
133      }
134    });
135    backupStore3.start(1);
136    backupStore3.recoverLease();
137
138    // Try to trigger a command on the master (WAL lease expired on the active one)
139    TableDescriptor htd = MasterProcedureTestingUtility.createHTD(TableName.valueOf(name.getMethodName()), "f");
140    RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null);
141    LOG.debug("submit proc");
142    try {
143      getMasterProcedureExecutor().submitProcedure(
144          new CreateTableProcedure(getMasterProcedureExecutor().getEnvironment(), htd, regions));
145      fail("expected RuntimeException 'sync aborted'");
146    } catch (RuntimeException e) {
147      LOG.info("got " + e.getMessage());
148    }
149    LOG.debug("wait master store abort");
150    masterStoreAbort.await();
151
152    // Now the real backup master should start up
153    LOG.debug("wait backup master to startup");
154    MasterProcedureTestingUtility.waitBackupMaster(UTIL, firstMaster);
155    assertEquals(true, firstMaster.isStopped());
156
157    // wait the store in here to abort (the test will fail due to timeout if it doesn't)
158    LOG.debug("wait the store to abort");
159    backupStore3.getStoreTracker().setDeleted(1, false);
160    try {
161      backupStore3.delete(1);
162      fail("expected RuntimeException 'sync aborted'");
163    } catch (RuntimeException e) {
164      LOG.info("got " + e.getMessage());
165    }
166    backupStore3Abort.await();
167  }
168
169  /**
170   * Tests proper fencing in case the current WAL store is fenced
171   */
172  @Test
173  public void testWALfencingWithoutWALRolling() throws IOException {
174    testWALfencing(false);
175  }
176
177  /**
178   * Tests proper fencing in case the current WAL store does not receive writes until after the
179   * new WAL does a couple of WAL rolls.
180   */
181  @Test
182  public void testWALfencingWithWALRolling() throws IOException {
183    testWALfencing(true);
184  }
185
186  public void testWALfencing(boolean walRolls) throws IOException {
187    final ProcedureStore procStore = getMasterProcedureExecutor().getStore();
188    assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore);
189
190    HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
191
192    // cause WAL rolling after a delete in WAL:
193    firstMaster.getConfiguration().setLong(WALProcedureStore.ROLL_THRESHOLD_CONF_KEY, 1);
194
195    HMaster backupMaster3 = Mockito.mock(HMaster.class);
196    Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
197    Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
198    final WALProcedureStore procStore2 = new WALProcedureStore(firstMaster.getConfiguration(),
199        ((WALProcedureStore)procStore).getWALDir(),
200        null,
201        new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
202
203    // start a second store which should fence the first one out
204    LOG.info("Starting new WALProcedureStore");
205    procStore2.start(1);
206    procStore2.recoverLease();
207
208    // before writing back to the WAL store, optionally do a couple of WAL rolls (which causes
209    // to delete the old WAL files).
210    if (walRolls) {
211      LOG.info("Inserting into second WALProcedureStore, causing WAL rolls");
212      for (int i = 0; i < 512; i++) {
213        // insert something to the second store then delete it, causing a WAL roll(s)
214        Procedure proc2 = new TestProcedure(i);
215        procStore2.insert(proc2, null);
216        procStore2.delete(proc2.getProcId()); // delete the procedure so that the WAL is removed later
217      }
218    }
219
220    // Now, insert something to the first store, should fail.
221    // If the store does a WAL roll and continue with another logId without checking higher logIds
222    // it will incorrectly succeed.
223    LOG.info("Inserting into first WALProcedureStore");
224    try {
225      procStore.insert(new TestProcedure(11), null);
226      fail("Inserting into Procedure Store should have failed");
227    } catch (Exception ex) {
228      LOG.info("Received expected exception", ex);
229    }
230  }
231
232  // ==========================================================================
233  //  Helpers
234  // ==========================================================================
235  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
236    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
237  }
238}