001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
021import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_MAX_SPLITTER;
022import static org.apache.hadoop.hbase.master.procedure.ServerProcedureInterface.ServerOperationType.SPLIT_WAL;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.List;
027import java.util.concurrent.CountDownLatch;
028import org.apache.hadoop.fs.FileStatus;
029import org.apache.hadoop.fs.FileSystem;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtil;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
037import org.apache.hadoop.hbase.master.procedure.ServerProcedureInterface;
038import org.apache.hadoop.hbase.procedure2.Procedure;
039import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
040import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
041import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
042import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
043import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
044import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
045import org.apache.hadoop.hbase.testclassification.LargeTests;
046import org.apache.hadoop.hbase.testclassification.MasterTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.CommonFSUtils;
049import org.apache.hadoop.hbase.util.JVMClusterUtil;
050import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
051import org.junit.After;
052import org.junit.Assert;
053import org.junit.Before;
054import org.junit.ClassRule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
061
062import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
063import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
064
065@Category({ MasterTests.class, LargeTests.class })
066
067public class TestSplitWALManager {
068
069  @ClassRule
070  public static final HBaseClassTestRule CLASS_RULE =
071    HBaseClassTestRule.forClass(TestSplitWALManager.class);
072
073  private static final Logger LOG = LoggerFactory.getLogger(TestSplitWALManager.class);
074  private static HBaseTestingUtil TEST_UTIL;
075  private HMaster master;
076  private SplitWALManager splitWALManager;
077  private TableName TABLE_NAME;
078  private byte[] FAMILY;
079
080  @Before
081  public void setup() throws Exception {
082    TEST_UTIL = new HBaseTestingUtil();
083    TEST_UTIL.getConfiguration().setBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, false);
084    TEST_UTIL.getConfiguration().setInt(HBASE_SPLIT_WAL_MAX_SPLITTER, 1);
085    TEST_UTIL.startMiniCluster(3);
086    master = TEST_UTIL.getHBaseCluster().getMaster();
087    splitWALManager = master.getSplitWALManager();
088    TABLE_NAME = TableName.valueOf(Bytes.toBytes("TestSplitWALManager"));
089    FAMILY = Bytes.toBytes("test");
090  }
091
092  @After
093  public void teardown() throws Exception {
094    TEST_UTIL.shutdownMiniCluster();
095  }
096
097  @Test
098  public void testAcquireAndRelease() throws Exception {
099    List<FakeServerProcedure> testProcedures = new ArrayList<>();
100    for (int i = 0; i < 4; i++) {
101      testProcedures
102        .add(new FakeServerProcedure(TEST_UTIL.getHBaseCluster().getServerHoldingMeta()));
103    }
104    ServerName server = splitWALManager.acquireSplitWALWorker(testProcedures.get(0));
105    Assert.assertNotNull(server);
106    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(1)));
107    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(2)));
108
109    Exception e = null;
110    try {
111      splitWALManager.acquireSplitWALWorker(testProcedures.get(3));
112    } catch (ProcedureSuspendedException suspendException) {
113      e = suspendException;
114    }
115    Assert.assertNotNull(e);
116    Assert.assertTrue(e instanceof ProcedureSuspendedException);
117
118    splitWALManager.releaseSplitWALWorker(server, TEST_UTIL.getHBaseCluster().getMaster()
119      .getMasterProcedureExecutor().getEnvironment().getProcedureScheduler());
120    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(3)));
121  }
122
123  @Test
124  public void testAddNewServer() throws Exception {
125    List<FakeServerProcedure> testProcedures = new ArrayList<>();
126    for (int i = 0; i < 4; i++) {
127      testProcedures
128        .add(new FakeServerProcedure(TEST_UTIL.getHBaseCluster().getServerHoldingMeta()));
129    }
130    ServerName server = splitWALManager.acquireSplitWALWorker(testProcedures.get(0));
131    Assert.assertNotNull(server);
132    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(1)));
133    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(2)));
134
135    Exception e = null;
136    try {
137      splitWALManager.acquireSplitWALWorker(testProcedures.get(3));
138    } catch (ProcedureSuspendedException suspendException) {
139      e = suspendException;
140    }
141    Assert.assertNotNull(e);
142    Assert.assertTrue(e instanceof ProcedureSuspendedException);
143
144    JVMClusterUtil.RegionServerThread newServer = TEST_UTIL.getHBaseCluster().startRegionServer();
145    newServer.waitForServerOnline();
146    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(3)));
147  }
148
149  @Test
150  public void testCreateSplitWALProcedures() throws Exception {
151    TEST_UTIL.createTable(TABLE_NAME, FAMILY, TEST_UTIL.KEYS_FOR_HBA_CREATE_TABLE);
152    // load table
153    TEST_UTIL.loadTable(TEST_UTIL.getConnection().getTable(TABLE_NAME), FAMILY);
154    ProcedureExecutor<MasterProcedureEnv> masterPE = master.getMasterProcedureExecutor();
155    ServerName metaServer = TEST_UTIL.getHBaseCluster().getServerHoldingMeta();
156    Path metaWALDir = new Path(TEST_UTIL.getDefaultRootDirPath(),
157      AbstractFSWALProvider.getWALDirectoryName(metaServer.toString()));
158    // Test splitting meta wal
159    FileStatus[] wals =
160      TEST_UTIL.getTestFileSystem().listStatus(metaWALDir, MasterWalManager.META_FILTER);
161    Assert.assertEquals(1, wals.length);
162    List<Procedure> testProcedures =
163      splitWALManager.createSplitWALProcedures(Lists.newArrayList(wals[0]), metaServer);
164    Assert.assertEquals(1, testProcedures.size());
165    ProcedureTestingUtility.submitAndWait(masterPE, testProcedures.get(0));
166    Assert.assertFalse(TEST_UTIL.getTestFileSystem().exists(wals[0].getPath()));
167
168    // Test splitting wal
169    wals = TEST_UTIL.getTestFileSystem().listStatus(metaWALDir, MasterWalManager.NON_META_FILTER);
170    Assert.assertEquals(1, wals.length);
171    testProcedures =
172      splitWALManager.createSplitWALProcedures(Lists.newArrayList(wals[0]), metaServer);
173    Assert.assertEquals(1, testProcedures.size());
174    ProcedureTestingUtility.submitAndWait(masterPE, testProcedures.get(0));
175    Assert.assertFalse(TEST_UTIL.getTestFileSystem().exists(wals[0].getPath()));
176  }
177
178  @Test
179  public void testAcquireAndReleaseSplitWALWorker() throws Exception {
180    ProcedureExecutor<MasterProcedureEnv> masterPE = master.getMasterProcedureExecutor();
181    List<FakeServerProcedure> testProcedures = new ArrayList<>();
182    for (int i = 0; i < 3; i++) {
183      FakeServerProcedure procedure =
184        new FakeServerProcedure(TEST_UTIL.getHBaseCluster().getRegionServer(i).getServerName());
185      testProcedures.add(procedure);
186      ProcedureTestingUtility.submitProcedure(masterPE, procedure, HConstants.NO_NONCE,
187        HConstants.NO_NONCE);
188    }
189    TEST_UTIL.waitFor(10000, () -> testProcedures.get(2).isWorkerAcquired());
190    FakeServerProcedure failedProcedure =
191      new FakeServerProcedure(TEST_UTIL.getHBaseCluster().getServerHoldingMeta());
192    ProcedureTestingUtility.submitProcedure(masterPE, failedProcedure, HConstants.NO_NONCE,
193      HConstants.NO_NONCE);
194    TEST_UTIL.waitFor(20000, () -> failedProcedure.isTriedToAcquire());
195    Assert.assertFalse(failedProcedure.isWorkerAcquired());
196    // let one procedure finish and release worker
197    testProcedures.get(0).countDown();
198    TEST_UTIL.waitFor(10000, () -> failedProcedure.isWorkerAcquired());
199    Assert.assertTrue(testProcedures.get(0).isSuccess());
200  }
201
202  @Test
203  public void testGetWALsToSplit() throws Exception {
204    TEST_UTIL.createTable(TABLE_NAME, FAMILY, TEST_UTIL.KEYS_FOR_HBA_CREATE_TABLE);
205    // load table
206    TEST_UTIL.loadTable(TEST_UTIL.getConnection().getTable(TABLE_NAME), FAMILY);
207    ServerName metaServer = TEST_UTIL.getHBaseCluster().getServerHoldingMeta();
208    List<FileStatus> metaWals = splitWALManager.getWALsToSplit(metaServer, true);
209    Assert.assertEquals(1, metaWals.size());
210    List<FileStatus> wals = splitWALManager.getWALsToSplit(metaServer, false);
211    Assert.assertEquals(1, wals.size());
212    ServerName testServer = TEST_UTIL.getHBaseCluster().getRegionServerThreads().stream()
213      .map(rs -> rs.getRegionServer().getServerName()).filter(rs -> rs != metaServer).findAny()
214      .get();
215    metaWals = splitWALManager.getWALsToSplit(testServer, true);
216    Assert.assertEquals(0, metaWals.size());
217  }
218
219  private void splitLogsTestHelper(HBaseTestingUtil testUtil) throws Exception {
220    HMaster hmaster = testUtil.getHBaseCluster().getMaster();
221    SplitWALManager splitWALManager = hmaster.getSplitWALManager();
222    LOG.info(
223      "The Master FS is pointing to: " + hmaster.getMasterFileSystem().getFileSystem().getUri());
224    LOG.info(
225      "The WAL FS is pointing to: " + hmaster.getMasterFileSystem().getWALFileSystem().getUri());
226
227    testUtil.createTable(TABLE_NAME, FAMILY, testUtil.KEYS_FOR_HBA_CREATE_TABLE);
228    // load table
229    testUtil.loadTable(testUtil.getConnection().getTable(TABLE_NAME), FAMILY);
230    ProcedureExecutor<MasterProcedureEnv> masterPE = hmaster.getMasterProcedureExecutor();
231    ServerName metaServer = testUtil.getHBaseCluster().getServerHoldingMeta();
232    ServerName testServer = testUtil.getHBaseCluster().getRegionServerThreads().stream()
233      .map(rs -> rs.getRegionServer().getServerName()).filter(rs -> rs != metaServer).findAny()
234      .get();
235    List<Procedure> procedures = splitWALManager.splitWALs(testServer, false);
236    Assert.assertEquals(1, procedures.size());
237    ProcedureTestingUtility.submitAndWait(masterPE, procedures.get(0));
238    Assert.assertEquals(0, splitWALManager.getWALsToSplit(testServer, false).size());
239
240    // Validate the old WAL file archive dir
241    Path walRootDir = hmaster.getMasterFileSystem().getWALRootDir();
242    Path walArchivePath = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
243    FileSystem walFS = hmaster.getMasterFileSystem().getWALFileSystem();
244    int archiveFileCount = walFS.listStatus(walArchivePath).length;
245
246    procedures = splitWALManager.splitWALs(metaServer, true);
247    Assert.assertEquals(1, procedures.size());
248    ProcedureTestingUtility.submitAndWait(masterPE, procedures.get(0));
249    Assert.assertEquals(0, splitWALManager.getWALsToSplit(metaServer, true).size());
250    Assert.assertEquals(1, splitWALManager.getWALsToSplit(metaServer, false).size());
251    // There should be archiveFileCount + 1 WALs after SplitWALProcedure finish
252    Assert.assertEquals("Splitted WAL files should be archived", archiveFileCount + 1,
253      walFS.listStatus(walArchivePath).length);
254  }
255
256  @Test
257  public void testSplitLogs() throws Exception {
258    splitLogsTestHelper(TEST_UTIL);
259  }
260
261  @Test
262  public void testSplitLogsWithDifferentWalAndRootFS() throws Exception {
263    HBaseTestingUtil testUtil2 = new HBaseTestingUtil();
264    testUtil2.getConfiguration().setBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, false);
265    testUtil2.getConfiguration().setInt(HBASE_SPLIT_WAL_MAX_SPLITTER, 1);
266    Path dir = TEST_UTIL.getDataTestDirOnTestFS("testWalDir");
267    testUtil2.getConfiguration().set(CommonFSUtils.HBASE_WAL_DIR, dir.toString());
268    CommonFSUtils.setWALRootDir(testUtil2.getConfiguration(), dir);
269    testUtil2.startMiniCluster(3);
270    splitLogsTestHelper(testUtil2);
271    testUtil2.shutdownMiniCluster();
272  }
273
274  @Test
275  public void testWorkerReloadWhenMasterRestart() throws Exception {
276    List<FakeServerProcedure> testProcedures = new ArrayList<>();
277    for (int i = 0; i < 3; i++) {
278      FakeServerProcedure procedure =
279        new FakeServerProcedure(TEST_UTIL.getHBaseCluster().getRegionServer(i).getServerName());
280      testProcedures.add(procedure);
281      ProcedureTestingUtility.submitProcedure(master.getMasterProcedureExecutor(), procedure,
282        HConstants.NO_NONCE, HConstants.NO_NONCE);
283    }
284    TEST_UTIL.waitFor(10000, () -> testProcedures.get(2).isWorkerAcquired());
285    // Kill master
286    TEST_UTIL.getHBaseCluster().killMaster(master.getServerName());
287    TEST_UTIL.getHBaseCluster().waitForMasterToStop(master.getServerName(), 20000);
288    // restart master
289    TEST_UTIL.getHBaseCluster().startMaster();
290    TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster();
291    this.master = TEST_UTIL.getHBaseCluster().getMaster();
292
293    FakeServerProcedure failedProcedure =
294      new FakeServerProcedure(TEST_UTIL.getHBaseCluster().getServerHoldingMeta());
295    ProcedureTestingUtility.submitProcedure(master.getMasterProcedureExecutor(), failedProcedure,
296      HConstants.NO_NONCE, HConstants.NO_NONCE);
297    TEST_UTIL.waitFor(20000, () -> failedProcedure.isTriedToAcquire());
298    Assert.assertFalse(failedProcedure.isWorkerAcquired());
299    for (int i = 0; i < 3; i++) {
300      testProcedures.get(i).countDown();
301    }
302    failedProcedure.countDown();
303  }
304
305  public static final class FakeServerProcedure
306    extends StateMachineProcedure<MasterProcedureEnv, MasterProcedureProtos.SplitWALState>
307    implements ServerProcedureInterface {
308
309    private ServerName serverName;
310    private ServerName worker;
311    private CountDownLatch barrier = new CountDownLatch(1);
312    private boolean triedToAcquire = false;
313
314    public FakeServerProcedure() {
315    }
316
317    public FakeServerProcedure(ServerName serverName) {
318      this.serverName = serverName;
319    }
320
321    public ServerName getServerName() {
322      return serverName;
323    }
324
325    @Override
326    public boolean hasMetaTableRegion() {
327      return false;
328    }
329
330    @Override
331    public ServerOperationType getServerOperationType() {
332      return SPLIT_WAL;
333    }
334
335    @Override
336    protected Flow executeFromState(MasterProcedureEnv env,
337      MasterProcedureProtos.SplitWALState state)
338      throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
339      SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
340      switch (state) {
341        case ACQUIRE_SPLIT_WAL_WORKER:
342          triedToAcquire = true;
343          worker = splitWALManager.acquireSplitWALWorker(this);
344          setNextState(MasterProcedureProtos.SplitWALState.DISPATCH_WAL_TO_WORKER);
345          return Flow.HAS_MORE_STATE;
346        case DISPATCH_WAL_TO_WORKER:
347          barrier.await();
348          setNextState(MasterProcedureProtos.SplitWALState.RELEASE_SPLIT_WORKER);
349          return Flow.HAS_MORE_STATE;
350        case RELEASE_SPLIT_WORKER:
351          splitWALManager.releaseSplitWALWorker(worker, env.getProcedureScheduler());
352          return Flow.NO_MORE_STATE;
353        default:
354          throw new UnsupportedOperationException("unhandled state=" + state);
355      }
356    }
357
358    public boolean isWorkerAcquired() {
359      return worker != null;
360    }
361
362    public boolean isTriedToAcquire() {
363      return triedToAcquire;
364    }
365
366    public void countDown() {
367      this.barrier.countDown();
368    }
369
370    @Override
371    protected void rollbackState(MasterProcedureEnv env, MasterProcedureProtos.SplitWALState state)
372      throws IOException, InterruptedException {
373
374    }
375
376    @Override
377    protected MasterProcedureProtos.SplitWALState getState(int stateId) {
378      return MasterProcedureProtos.SplitWALState.forNumber(stateId);
379    }
380
381    @Override
382    protected int getStateId(MasterProcedureProtos.SplitWALState state) {
383      return state.getNumber();
384    }
385
386    @Override
387    protected MasterProcedureProtos.SplitWALState getInitialState() {
388      return MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER;
389    }
390
391    @Override
392    protected boolean holdLock(MasterProcedureEnv env) {
393      return true;
394    }
395
396    @Override
397    protected void rollback(MasterProcedureEnv env) throws IOException, InterruptedException {
398
399    }
400
401    @Override
402    protected boolean abort(MasterProcedureEnv env) {
403      return false;
404    }
405
406    @Override
407    protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
408      MasterProcedureProtos.SplitWALData.Builder builder =
409        MasterProcedureProtos.SplitWALData.newBuilder();
410      builder.setWalPath("test").setCrashedServer(ProtobufUtil.toServerName(serverName));
411      serializer.serialize(builder.build());
412    }
413
414    @Override
415    protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
416      MasterProcedureProtos.SplitWALData data =
417        serializer.deserialize(MasterProcedureProtos.SplitWALData.class);
418      serverName = ProtobufUtil.toServerName(data.getCrashedServer());
419    }
420  }
421}