001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.replication;
019
020import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.DISPATCH_WALS_VALUE;
021import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE;
022
023import java.io.IOException;
024import java.io.UncheckedIOException;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.master.HMaster;
028import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
029import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
030import org.apache.hadoop.hbase.replication.SyncReplicationState;
031import org.apache.hadoop.hbase.replication.SyncReplicationTestBase;
032import org.apache.hadoop.hbase.testclassification.LargeTests;
033import org.apache.hadoop.hbase.testclassification.MasterTests;
034import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
035import org.junit.jupiter.api.AfterAll;
036import org.junit.jupiter.api.BeforeAll;
037import org.junit.jupiter.api.Tag;
038import org.junit.jupiter.api.Test;
039
040/**
041 * Testcase for HBASE-21494.
042 */
043@Tag(MasterTests.TAG)
044@Tag(LargeTests.TAG)
045public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBase {
046
047  private static volatile boolean FAIL = false;
048
049  public static final class HMasterForTest extends HMaster {
050
051    public HMasterForTest(Configuration conf) throws IOException {
052      super(conf);
053    }
054
055    @Override
056    public void remoteProcedureCompleted(long procId, byte[] data) {
057      if (
058        FAIL && getMasterProcedureExecutor()
059          .getProcedure(procId) instanceof SyncReplicationReplayWALRemoteProcedure
060      ) {
061        throw new RuntimeException("Inject error");
062      }
063      super.remoteProcedureCompleted(procId, data);
064    }
065  }
066
067  @BeforeAll
068  public static void setUp() throws Exception {
069    UTIL2.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
070    SyncReplicationTestBase.setUp();
071  }
072
073  @AfterAll
074  public static void tearDown() throws Exception {
075    SyncReplicationTestBase.tearDown();
076  }
077
078  @Test
079  public void testRestart() throws Exception {
080    UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
081      SyncReplicationState.STANDBY);
082    UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
083      SyncReplicationState.ACTIVE);
084
085    UTIL1.getAdmin().disableReplicationPeer(PEER_ID);
086    write(UTIL1, 0, 100);
087    Thread.sleep(2000);
088    // peer is disabled so no data have been replicated
089    verifyNotReplicatedThroughRegion(UTIL2, 0, 100);
090
091    // transit the A to DA first to avoid too many error logs.
092    UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
093      SyncReplicationState.DOWNGRADE_ACTIVE);
094    HMaster master = UTIL2.getHBaseCluster().getMaster();
095    // make sure the transiting can not succeed
096    FAIL = true;
097    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
098    Thread t = new Thread() {
099
100      @Override
101      public void run() {
102        try {
103          UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
104            SyncReplicationState.DOWNGRADE_ACTIVE);
105        } catch (IOException e) {
106          throw new UncheckedIOException(e);
107        }
108      }
109    };
110    t.start();
111    // wait until we are in the states where we need to register peer worker when restarting
112    UTIL2.waitFor(60000,
113      () -> procExec.getProcedures().stream().filter(p -> p instanceof RecoverStandbyProcedure)
114        .map(p -> (RecoverStandbyProcedure) p)
115        .anyMatch(p -> p.getCurrentStateId() == DISPATCH_WALS_VALUE
116          || p.getCurrentStateId() == UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE));
117    // failover to another master
118    MasterThread mt = UTIL2.getMiniHBaseCluster().getMasterThread();
119    mt.getMaster().abort("for testing");
120    mt.join();
121    FAIL = false;
122    t.join();
123    // make sure the new master can finish the transition
124    UTIL2.waitFor(60000, () -> UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID)
125        == SyncReplicationState.DOWNGRADE_ACTIVE);
126    verify(UTIL2, 0, 100);
127  }
128}