001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.replication; 019 020import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.DISPATCH_WALS_VALUE; 021import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE; 022 023import java.io.IOException; 024import java.io.UncheckedIOException; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.master.HMaster; 028import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 029import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 030import org.apache.hadoop.hbase.replication.SyncReplicationState; 031import org.apache.hadoop.hbase.replication.SyncReplicationTestBaseNoBeforeAll; 032import org.apache.hadoop.hbase.testclassification.LargeTests; 033import org.apache.hadoop.hbase.testclassification.MasterTests; 034import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; 035import org.junit.jupiter.api.BeforeAll; 036import org.junit.jupiter.api.Tag; 037import org.junit.jupiter.api.Test; 038 039/** 040 * Testcase for HBASE-21494. 041 */ 042@Tag(MasterTests.TAG) 043@Tag(LargeTests.TAG) 044public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBaseNoBeforeAll { 045 046 private static volatile boolean FAIL = false; 047 048 public static final class HMasterForTest extends HMaster { 049 050 public HMasterForTest(Configuration conf) throws IOException { 051 super(conf); 052 } 053 054 @Override 055 public void remoteProcedureCompleted(long procId, byte[] data) { 056 if ( 057 FAIL && getMasterProcedureExecutor() 058 .getProcedure(procId) instanceof SyncReplicationReplayWALRemoteProcedure 059 ) { 060 throw new RuntimeException("Inject error"); 061 } 062 super.remoteProcedureCompleted(procId, data); 063 } 064 } 065 066 @BeforeAll 067 public static void setUp() throws Exception { 068 UTIL2.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 069 startClusters(); 070 } 071 072 @Test 073 public void testRestart() throws Exception { 074 UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 075 SyncReplicationState.STANDBY); 076 UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 077 SyncReplicationState.ACTIVE); 078 079 UTIL1.getAdmin().disableReplicationPeer(PEER_ID); 080 write(UTIL1, 0, 100); 081 Thread.sleep(2000); 082 // peer is disabled so no data have been replicated 083 verifyNotReplicatedThroughRegion(UTIL2, 0, 100); 084 085 // transit the A to DA first to avoid too many error logs. 086 UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 087 SyncReplicationState.DOWNGRADE_ACTIVE); 088 HMaster master = UTIL2.getHBaseCluster().getMaster(); 089 // make sure the transiting can not succeed 090 FAIL = true; 091 ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor(); 092 Thread t = new Thread() { 093 094 @Override 095 public void run() { 096 try { 097 UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 098 SyncReplicationState.DOWNGRADE_ACTIVE); 099 } catch (IOException e) { 100 throw new UncheckedIOException(e); 101 } 102 } 103 }; 104 t.start(); 105 // wait until we are in the states where we need to register peer worker when restarting 106 UTIL2.waitFor(60000, 107 () -> procExec.getProcedures().stream().filter(p -> p instanceof RecoverStandbyProcedure) 108 .map(p -> (RecoverStandbyProcedure) p) 109 .anyMatch(p -> p.getCurrentStateId() == DISPATCH_WALS_VALUE 110 || p.getCurrentStateId() == UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE)); 111 // failover to another master 112 MasterThread mt = UTIL2.getMiniHBaseCluster().getMasterThread(); 113 mt.getMaster().abort("for testing"); 114 mt.join(); 115 FAIL = false; 116 t.join(); 117 // make sure the new master can finish the transition 118 UTIL2.waitFor(60000, () -> UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID) 119 == SyncReplicationState.DOWNGRADE_ACTIVE); 120 verify(UTIL2, 0, 100); 121 } 122}