001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022import static org.junit.Assert.fail;
023
024import java.io.IOException;
025import java.util.concurrent.CountDownLatch;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.client.TableDescriptor;
032import org.apache.hadoop.hbase.master.HMaster;
033import org.apache.hadoop.hbase.procedure2.Procedure;
034import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
035import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
036import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
037import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
038import org.apache.hadoop.hbase.testclassification.LargeTests;
039import org.apache.hadoop.hbase.testclassification.MasterTests;
040import org.apache.hadoop.hbase.util.ModifyRegionUtils;
041import org.junit.After;
042import org.junit.Before;
043import org.junit.ClassRule;
044import org.junit.Ignore;
045import org.junit.Rule;
046import org.junit.Test;
047import org.junit.experimental.categories.Category;
048import org.junit.rules.TestName;
049import org.mockito.Mockito;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053@Category({MasterTests.class, LargeTests.class})
054@Ignore
055public class TestMasterProcedureWalLease {
056
057  @ClassRule
058  public static final HBaseClassTestRule CLASS_RULE =
059      HBaseClassTestRule.forClass(TestMasterProcedureWalLease.class);
060
061  private static final Logger LOG = LoggerFactory.getLogger(TestMasterProcedureWalLease.class);
062
063  @Rule
064  public TestName name = new TestName();
065
066  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
067
068  private static void setupConf(Configuration conf) {
069    // don't waste time retrying with the roll, the test is already slow enough.
070    conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1);
071    conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0);
072    conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1);
073    conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1);
074  }
075
076  @Before
077  public void setup() throws Exception {
078    setupConf(UTIL.getConfiguration());
079    UTIL.startMiniCluster(2, 3);
080  }
081
082  @After
083  public void tearDown() throws Exception {
084    try {
085      UTIL.shutdownMiniCluster();
086    } catch (Exception e) {
087      LOG.warn("failure shutting down cluster", e);
088    }
089  }
090
091  @Test
092  public void testWalRecoverLease() throws Exception {
093    final ProcedureStore masterStore = getMasterProcedureExecutor().getStore();
094    assertTrue("expected WALStore for this test", masterStore instanceof WALProcedureStore);
095
096    HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
097    // Abort Latch for the master store
098    final CountDownLatch masterStoreAbort = new CountDownLatch(1);
099    masterStore.registerListener(new ProcedureStore.ProcedureStoreListener() {
100      @Override
101      public void postSync() {}
102
103      @Override
104      public void abortProcess() {
105        LOG.debug("Abort store of Master");
106        masterStoreAbort.countDown();
107      }
108    });
109
110    // startup a fake master the new WAL store will take the lease
111    // and the active master should abort.
112    HMaster backupMaster3 = Mockito.mock(HMaster.class);
113    Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
114    Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
115    final WALProcedureStore backupStore3 = new WALProcedureStore(firstMaster.getConfiguration(),
116        ((WALProcedureStore)masterStore).getWALDir(),
117        null,
118        new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
119    // Abort Latch for the test store
120    final CountDownLatch backupStore3Abort = new CountDownLatch(1);
121    backupStore3.registerListener(new ProcedureStore.ProcedureStoreListener() {
122      @Override
123      public void postSync() {}
124
125      @Override
126      public void abortProcess() {
127        LOG.debug("Abort store of backupMaster3");
128        backupStore3Abort.countDown();
129        backupStore3.stop(true);
130      }
131    });
132    backupStore3.start(1);
133    backupStore3.recoverLease();
134
135    // Try to trigger a command on the master (WAL lease expired on the active one)
136    TableDescriptor htd = MasterProcedureTestingUtility.createHTD(TableName.valueOf(name.getMethodName()), "f");
137    RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null);
138    LOG.debug("submit proc");
139    try {
140      getMasterProcedureExecutor().submitProcedure(
141          new CreateTableProcedure(getMasterProcedureExecutor().getEnvironment(), htd, regions));
142      fail("expected RuntimeException 'sync aborted'");
143    } catch (RuntimeException e) {
144      LOG.info("got " + e.getMessage());
145    }
146    LOG.debug("wait master store abort");
147    masterStoreAbort.await();
148
149    // Now the real backup master should start up
150    LOG.debug("wait backup master to startup");
151    MasterProcedureTestingUtility.waitBackupMaster(UTIL, firstMaster);
152    assertEquals(true, firstMaster.isStopped());
153
154    // wait the store in here to abort (the test will fail due to timeout if it doesn't)
155    LOG.debug("wait the store to abort");
156    backupStore3.getStoreTracker().setDeleted(1, false);
157    try {
158      backupStore3.delete(1);
159      fail("expected RuntimeException 'sync aborted'");
160    } catch (RuntimeException e) {
161      LOG.info("got " + e.getMessage());
162    }
163    backupStore3Abort.await();
164  }
165
166  /**
167   * Tests proper fencing in case the current WAL store is fenced
168   */
169  @Test
170  public void testWALfencingWithoutWALRolling() throws IOException {
171    testWALfencing(false);
172  }
173
174  /**
175   * Tests proper fencing in case the current WAL store does not receive writes until after the
176   * new WAL does a couple of WAL rolls.
177   */
178  @Test
179  public void testWALfencingWithWALRolling() throws IOException {
180    testWALfencing(true);
181  }
182
183  public void testWALfencing(boolean walRolls) throws IOException {
184    final ProcedureStore procStore = getMasterProcedureExecutor().getStore();
185    assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore);
186
187    HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
188
189    // cause WAL rolling after a delete in WAL:
190    firstMaster.getConfiguration().setLong(WALProcedureStore.ROLL_THRESHOLD_CONF_KEY, 1);
191
192    HMaster backupMaster3 = Mockito.mock(HMaster.class);
193    Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
194    Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
195    final WALProcedureStore procStore2 = new WALProcedureStore(firstMaster.getConfiguration(),
196        ((WALProcedureStore)procStore).getWALDir(),
197        null,
198        new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
199
200    // start a second store which should fence the first one out
201    LOG.info("Starting new WALProcedureStore");
202    procStore2.start(1);
203    procStore2.recoverLease();
204
205    // before writing back to the WAL store, optionally do a couple of WAL rolls (which causes
206    // to delete the old WAL files).
207    if (walRolls) {
208      LOG.info("Inserting into second WALProcedureStore, causing WAL rolls");
209      for (int i = 0; i < 512; i++) {
210        // insert something to the second store then delete it, causing a WAL roll(s)
211        Procedure proc2 = new TestProcedure(i);
212        procStore2.insert(proc2, null);
213        procStore2.delete(proc2.getProcId()); // delete the procedure so that the WAL is removed later
214      }
215    }
216
217    // Now, insert something to the first store, should fail.
218    // If the store does a WAL roll and continue with another logId without checking higher logIds
219    // it will incorrectly succeed.
220    LOG.info("Inserting into first WALProcedureStore");
221    try {
222      procStore.insert(new TestProcedure(11), null);
223      fail("Inserting into Procedure Store should have failed");
224    } catch (Exception ex) {
225      LOG.info("Received expected exception", ex);
226    }
227  }
228
229  // ==========================================================================
230  //  Helpers
231  // ==========================================================================
232  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
233    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
234  }
235}