001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.replication; 019 020import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.DISPATCH_WALS_VALUE; 021import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE; 022 023import java.io.IOException; 024import java.io.UncheckedIOException; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.master.HMaster; 028import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 029import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 030import org.apache.hadoop.hbase.replication.SyncReplicationState; 031import org.apache.hadoop.hbase.replication.SyncReplicationTestBase; 032import org.apache.hadoop.hbase.testclassification.LargeTests; 033import org.apache.hadoop.hbase.testclassification.MasterTests; 034import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; 035import org.junit.jupiter.api.AfterAll; 036import org.junit.jupiter.api.BeforeAll; 037import org.junit.jupiter.api.Tag; 038import org.junit.jupiter.api.Test; 039 040/** 041 * Testcase for HBASE-21494. 042 */ 043@Tag(MasterTests.TAG) 044@Tag(LargeTests.TAG) 045public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBase { 046 047 private static volatile boolean FAIL = false; 048 049 public static final class HMasterForTest extends HMaster { 050 051 public HMasterForTest(Configuration conf) throws IOException { 052 super(conf); 053 } 054 055 @Override 056 public void remoteProcedureCompleted(long procId, byte[] data) { 057 if ( 058 FAIL && getMasterProcedureExecutor() 059 .getProcedure(procId) instanceof SyncReplicationReplayWALRemoteProcedure 060 ) { 061 throw new RuntimeException("Inject error"); 062 } 063 super.remoteProcedureCompleted(procId, data); 064 } 065 } 066 067 @BeforeAll 068 public static void setUp() throws Exception { 069 UTIL2.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 070 SyncReplicationTestBase.setUp(); 071 } 072 073 @AfterAll 074 public static void tearDown() throws Exception { 075 SyncReplicationTestBase.tearDown(); 076 } 077 078 @Test 079 public void testRestart() throws Exception { 080 UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 081 SyncReplicationState.STANDBY); 082 UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 083 SyncReplicationState.ACTIVE); 084 085 UTIL1.getAdmin().disableReplicationPeer(PEER_ID); 086 write(UTIL1, 0, 100); 087 Thread.sleep(2000); 088 // peer is disabled so no data have been replicated 089 verifyNotReplicatedThroughRegion(UTIL2, 0, 100); 090 091 // transit the A to DA first to avoid too many error logs. 092 UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 093 SyncReplicationState.DOWNGRADE_ACTIVE); 094 HMaster master = UTIL2.getHBaseCluster().getMaster(); 095 // make sure the transiting can not succeed 096 FAIL = true; 097 ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor(); 098 Thread t = new Thread() { 099 100 @Override 101 public void run() { 102 try { 103 UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, 104 SyncReplicationState.DOWNGRADE_ACTIVE); 105 } catch (IOException e) { 106 throw new UncheckedIOException(e); 107 } 108 } 109 }; 110 t.start(); 111 // wait until we are in the states where we need to register peer worker when restarting 112 UTIL2.waitFor(60000, 113 () -> procExec.getProcedures().stream().filter(p -> p instanceof RecoverStandbyProcedure) 114 .map(p -> (RecoverStandbyProcedure) p) 115 .anyMatch(p -> p.getCurrentStateId() == DISPATCH_WALS_VALUE 116 || p.getCurrentStateId() == UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE)); 117 // failover to another master 118 MasterThread mt = UTIL2.getMiniHBaseCluster().getMasterThread(); 119 mt.getMaster().abort("for testing"); 120 mt.join(); 121 FAIL = false; 122 t.join(); 123 // make sure the new master can finish the transition 124 UTIL2.waitFor(60000, () -> UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID) 125 == SyncReplicationState.DOWNGRADE_ACTIVE); 126 verify(UTIL2, 0, 100); 127 } 128}