001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.replication;
019
020import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.DISPATCH_WALS_VALUE;
021import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE;
022
023import java.io.IOException;
024import java.io.UncheckedIOException;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.master.HMaster;
028import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
029import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
030import org.apache.hadoop.hbase.replication.SyncReplicationState;
031import org.apache.hadoop.hbase.replication.SyncReplicationTestBaseNoBeforeAll;
032import org.apache.hadoop.hbase.testclassification.LargeTests;
033import org.apache.hadoop.hbase.testclassification.MasterTests;
034import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
035import org.junit.jupiter.api.BeforeAll;
036import org.junit.jupiter.api.Tag;
037import org.junit.jupiter.api.Test;
038
039/**
040 * Testcase for HBASE-21494.
041 */
042@Tag(MasterTests.TAG)
043@Tag(LargeTests.TAG)
044public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBaseNoBeforeAll {
045
046  private static volatile boolean FAIL = false;
047
048  public static final class HMasterForTest extends HMaster {
049
050    public HMasterForTest(Configuration conf) throws IOException {
051      super(conf);
052    }
053
054    @Override
055    public void remoteProcedureCompleted(long procId, byte[] data) {
056      if (
057        FAIL && getMasterProcedureExecutor()
058          .getProcedure(procId) instanceof SyncReplicationReplayWALRemoteProcedure
059      ) {
060        throw new RuntimeException("Inject error");
061      }
062      super.remoteProcedureCompleted(procId, data);
063    }
064  }
065
066  @BeforeAll
067  public static void setUp() throws Exception {
068    UTIL2.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
069    startClusters();
070  }
071
072  @Test
073  public void testRestart() throws Exception {
074    UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
075      SyncReplicationState.STANDBY);
076    UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
077      SyncReplicationState.ACTIVE);
078
079    UTIL1.getAdmin().disableReplicationPeer(PEER_ID);
080    write(UTIL1, 0, 100);
081    Thread.sleep(2000);
082    // peer is disabled so no data have been replicated
083    verifyNotReplicatedThroughRegion(UTIL2, 0, 100);
084
085    // transit the A to DA first to avoid too many error logs.
086    UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
087      SyncReplicationState.DOWNGRADE_ACTIVE);
088    HMaster master = UTIL2.getHBaseCluster().getMaster();
089    // make sure the transiting can not succeed
090    FAIL = true;
091    ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
092    Thread t = new Thread() {
093
094      @Override
095      public void run() {
096        try {
097          UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
098            SyncReplicationState.DOWNGRADE_ACTIVE);
099        } catch (IOException e) {
100          throw new UncheckedIOException(e);
101        }
102      }
103    };
104    t.start();
105    // wait until we are in the states where we need to register peer worker when restarting
106    UTIL2.waitFor(60000,
107      () -> procExec.getProcedures().stream().filter(p -> p instanceof RecoverStandbyProcedure)
108        .map(p -> (RecoverStandbyProcedure) p)
109        .anyMatch(p -> p.getCurrentStateId() == DISPATCH_WALS_VALUE
110          || p.getCurrentStateId() == UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE));
111    // failover to another master
112    MasterThread mt = UTIL2.getMiniHBaseCluster().getMasterThread();
113    mt.getMaster().abort("for testing");
114    mt.join();
115    FAIL = false;
116    t.join();
117    // make sure the new master can finish the transition
118    UTIL2.waitFor(60000, () -> UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID)
119        == SyncReplicationState.DOWNGRADE_ACTIVE);
120    verify(UTIL2, 0, 100);
121  }
122}