001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.concurrent.CountDownLatch; 022import java.util.concurrent.Future; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.HBaseClassTestRule; 025import org.apache.hadoop.hbase.HBaseTestingUtil; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.ServerName; 028import org.apache.hadoop.hbase.TableName; 029import org.apache.hadoop.hbase.client.RegionInfo; 030import org.apache.hadoop.hbase.master.HMaster; 031import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 032import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager; 033import org.apache.hadoop.hbase.procedure2.Procedure; 034import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 035import org.apache.hadoop.hbase.testclassification.MasterTests; 036import org.apache.hadoop.hbase.testclassification.MediumTests; 037import org.apache.hadoop.hbase.util.Bytes; 038import org.apache.zookeeper.KeeperException; 039import org.junit.AfterClass; 040import org.junit.BeforeClass; 041import org.junit.ClassRule; 042import org.junit.Test; 043import org.junit.experimental.categories.Category; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; 048 049/** 050 * Testcase for HBASE-28522. 051 * <p> 052 * We used to have test with the same name but in different package for HBASE-23636, where DTP will 053 * hold the exclusive lock all the time, and it will reset TRSPs which has been attached to 054 * RegionStateNodes, so we need special logic in SCP to deal with it. 055 * <p> 056 * After HBASE-28522, DTP will not reset TRSPs any more, so SCP does not need to take care of this 057 * special case, thues we removed the special logic in SCP and also the UT for HBASE-22636 is not 058 * valid any more, so we just removed the old one and introduce a new one with the same name here. 059 */ 060@Category({ MasterTests.class, MediumTests.class }) 061public class TestRaceBetweenSCPAndDTP { 062 063 @ClassRule 064 public static final HBaseClassTestRule CLASS_RULE = 065 HBaseClassTestRule.forClass(TestRaceBetweenSCPAndDTP.class); 066 067 private static final Logger LOG = LoggerFactory.getLogger(TestRaceBetweenSCPAndDTP.class); 068 069 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 070 071 private static TableName NAME = TableName.valueOf("Race"); 072 073 private static byte[] CF = Bytes.toBytes("cf"); 074 075 private static CountDownLatch ARRIVE_GET_REPLICATION_PEER_MANAGER; 076 077 private static CountDownLatch RESUME_GET_REPLICATION_PEER_MANAGER; 078 079 public static final class HMasterForTest extends HMaster { 080 081 public HMasterForTest(Configuration conf) throws IOException, KeeperException { 082 super(conf); 083 } 084 085 @Override 086 public ReplicationPeerManager getReplicationPeerManager() { 087 if (ARRIVE_GET_REPLICATION_PEER_MANAGER != null) { 088 ARRIVE_GET_REPLICATION_PEER_MANAGER.countDown(); 089 ARRIVE_GET_REPLICATION_PEER_MANAGER = null; 090 try { 091 RESUME_GET_REPLICATION_PEER_MANAGER.await(); 092 } catch (InterruptedException e) { 093 } 094 } 095 return super.getReplicationPeerManager(); 096 } 097 } 098 099 @BeforeClass 100 public static void setUp() throws Exception { 101 UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 102 UTIL.startMiniCluster(2); 103 UTIL.createTable(NAME, CF); 104 UTIL.waitTableAvailable(NAME); 105 UTIL.getAdmin().balancerSwitch(false, true); 106 } 107 108 @AfterClass 109 public static void tearDown() throws Exception { 110 UTIL.shutdownMiniCluster(); 111 } 112 113 private boolean wasExecuted(Procedure<?> proc) { 114 // RUNNABLE is not enough to make sure that the DTP has acquired the table lock, as we will set 115 // procedure to RUNNABLE first and then acquire the execution lock 116 return proc.wasExecuted() || proc.getState() == ProcedureState.WAITING_TIMEOUT 117 || proc.getState() == ProcedureState.WAITING; 118 } 119 120 @Test 121 public void testRace() throws Exception { 122 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 123 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 124 ServerName sn = am.getRegionStates().getRegionState(region).getServerName(); 125 LOG.info("ServerName={}, region={}", sn, region); 126 127 ARRIVE_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1); 128 RESUME_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1); 129 // Assign to local variable because this static gets set to null in above running thread and 130 // so NPE. 131 CountDownLatch cdl = ARRIVE_GET_REPLICATION_PEER_MANAGER; 132 UTIL.getMiniHBaseCluster().stopRegionServer(sn); 133 cdl.await(); 134 135 Future<?> future = UTIL.getAdmin().disableTableAsync(NAME); 136 ProcedureExecutor<?> procExec = 137 UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 138 // make sure the DTP has been executed 139 UTIL.waitFor(60000, 140 () -> procExec.getProcedures().stream().filter(p -> p instanceof DisableTableProcedure) 141 .map(p -> (DisableTableProcedure) p).filter(p -> p.getTableName().equals(NAME)) 142 .anyMatch(this::wasExecuted)); 143 RESUME_GET_REPLICATION_PEER_MANAGER.countDown(); 144 145 // make sure the DTP can finish 146 future.get(); 147 148 // also make sure all SCPs are finished 149 UTIL.waitFor(60000, () -> procExec.getProcedures().stream() 150 .filter(p -> p instanceof ServerCrashProcedure).allMatch(Procedure::isFinished)); 151 } 152}