001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.concurrent.CountDownLatch; 022import java.util.concurrent.Future; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.HBaseTestingUtil; 025import org.apache.hadoop.hbase.HConstants; 026import org.apache.hadoop.hbase.ServerName; 027import org.apache.hadoop.hbase.TableName; 028import org.apache.hadoop.hbase.client.RegionInfo; 029import org.apache.hadoop.hbase.master.HMaster; 030import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 031import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager; 032import org.apache.hadoop.hbase.procedure2.Procedure; 033import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 034import org.apache.hadoop.hbase.testclassification.MasterTests; 035import org.apache.hadoop.hbase.testclassification.MediumTests; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.zookeeper.KeeperException; 038import org.junit.jupiter.api.AfterAll; 039import org.junit.jupiter.api.BeforeAll; 040import org.junit.jupiter.api.Tag; 041import org.junit.jupiter.api.Test; 042import org.slf4j.Logger; 043import org.slf4j.LoggerFactory; 044 045import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; 046 047/** 048 * Testcase for HBASE-28522. 049 * <p> 050 * We used to have test with the same name but in different package for HBASE-23636, where DTP will 051 * hold the exclusive lock all the time, and it will reset TRSPs which has been attached to 052 * RegionStateNodes, so we need special logic in SCP to deal with it. 053 * <p> 054 * After HBASE-28522, DTP will not reset TRSPs any more, so SCP does not need to take care of this 055 * special case, thues we removed the special logic in SCP and also the UT for HBASE-22636 is not 056 * valid any more, so we just removed the old one and introduce a new one with the same name here. 057 */ 058@Tag(MasterTests.TAG) 059@Tag(MediumTests.TAG) 060public class TestRaceBetweenSCPAndDTP { 061 062 private static final Logger LOG = LoggerFactory.getLogger(TestRaceBetweenSCPAndDTP.class); 063 064 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 065 066 private static TableName NAME = TableName.valueOf("Race"); 067 068 private static byte[] CF = Bytes.toBytes("cf"); 069 070 private static CountDownLatch ARRIVE_GET_REPLICATION_PEER_MANAGER; 071 072 private static CountDownLatch RESUME_GET_REPLICATION_PEER_MANAGER; 073 074 public static final class HMasterForTest extends HMaster { 075 076 public HMasterForTest(Configuration conf) throws IOException, KeeperException { 077 super(conf); 078 } 079 080 @Override 081 public ReplicationPeerManager getReplicationPeerManager() { 082 if (ARRIVE_GET_REPLICATION_PEER_MANAGER != null) { 083 ARRIVE_GET_REPLICATION_PEER_MANAGER.countDown(); 084 ARRIVE_GET_REPLICATION_PEER_MANAGER = null; 085 try { 086 RESUME_GET_REPLICATION_PEER_MANAGER.await(); 087 } catch (InterruptedException e) { 088 } 089 } 090 return super.getReplicationPeerManager(); 091 } 092 } 093 094 @BeforeAll 095 public static void setUp() throws Exception { 096 UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 097 UTIL.startMiniCluster(2); 098 UTIL.createTable(NAME, CF); 099 UTIL.waitTableAvailable(NAME); 100 UTIL.getAdmin().balancerSwitch(false, true); 101 } 102 103 @AfterAll 104 public static void tearDown() throws Exception { 105 UTIL.shutdownMiniCluster(); 106 } 107 108 private boolean wasExecuted(Procedure<?> proc) { 109 // RUNNABLE is not enough to make sure that the DTP has acquired the table lock, as we will set 110 // procedure to RUNNABLE first and then acquire the execution lock 111 return proc.wasExecuted() || proc.getState() == ProcedureState.WAITING_TIMEOUT 112 || proc.getState() == ProcedureState.WAITING; 113 } 114 115 @Test 116 public void testRace() throws Exception { 117 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 118 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 119 ServerName sn = am.getRegionStates().getRegionState(region).getServerName(); 120 LOG.info("ServerName={}, region={}", sn, region); 121 122 ARRIVE_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1); 123 RESUME_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1); 124 // Assign to local variable because this static gets set to null in above running thread and 125 // so NPE. 126 CountDownLatch cdl = ARRIVE_GET_REPLICATION_PEER_MANAGER; 127 UTIL.getMiniHBaseCluster().stopRegionServer(sn); 128 cdl.await(); 129 130 Future<?> future = UTIL.getAdmin().disableTableAsync(NAME); 131 ProcedureExecutor<?> procExec = 132 UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 133 // make sure the DTP has been executed 134 UTIL.waitFor(60000, 135 () -> procExec.getProcedures().stream().filter(p -> p instanceof DisableTableProcedure) 136 .map(p -> (DisableTableProcedure) p).filter(p -> p.getTableName().equals(NAME)) 137 .anyMatch(this::wasExecuted)); 138 RESUME_GET_REPLICATION_PEER_MANAGER.countDown(); 139 140 // make sure the DTP can finish 141 future.get(); 142 143 // also make sure all SCPs are finished 144 UTIL.waitFor(60000, () -> procExec.getProcedures().stream() 145 .filter(p -> p instanceof ServerCrashProcedure).allMatch(Procedure::isFinished)); 146 } 147}