001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.jupiter.api.Assertions.assertNotNull; 021import static org.junit.jupiter.api.Assertions.assertNull; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.concurrent.CountDownLatch; 027import java.util.concurrent.Future; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.hbase.HBaseTestingUtil; 030import org.apache.hadoop.hbase.PleaseHoldException; 031import org.apache.hadoop.hbase.ServerName; 032import org.apache.hadoop.hbase.StartTestingClusterOption; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.RegionInfo; 035import org.apache.hadoop.hbase.master.HMaster; 036import org.apache.hadoop.hbase.master.MasterServices; 037import org.apache.hadoop.hbase.master.RegionPlan; 038import org.apache.hadoop.hbase.master.RegionServerList; 039import org.apache.hadoop.hbase.master.ServerManager; 040import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 041import org.apache.hadoop.hbase.master.region.MasterRegion; 042import org.apache.hadoop.hbase.regionserver.HRegionServer; 043import org.apache.hadoop.hbase.testclassification.MasterTests; 044import org.apache.hadoop.hbase.testclassification.MediumTests; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.apache.hadoop.hbase.util.IdLock; 047import org.junit.jupiter.api.AfterAll; 048import org.junit.jupiter.api.BeforeAll; 049import org.junit.jupiter.api.Tag; 050import org.junit.jupiter.api.Test; 051 052import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 053 054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 057 058/** 059 * Testcase for HBASE-22365. 060 */ 061@Tag(MasterTests.TAG) 062@Tag(MediumTests.TAG) 063public class TestSCPGetRegionsRace { 064 065 private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>(); 066 067 private static final class ServerManagerForTest extends ServerManager { 068 069 public ServerManagerForTest(MasterServices master, RegionServerList storage) { 070 super(master, storage); 071 } 072 073 @Override 074 public List<ServerName> createDestinationServersList() { 075 return super.createDestinationServersList(EXCLUDE_SERVERS); 076 } 077 } 078 079 private static CountDownLatch ARRIVE_REPORT; 080 081 private static CountDownLatch RESUME_REPORT; 082 083 private static CountDownLatch ARRIVE_GET; 084 085 private static CountDownLatch RESUME_GET; 086 087 private static final class AssignmentManagerForTest extends AssignmentManager { 088 089 public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) { 090 super(master, masterRegion); 091 } 092 093 @Override 094 public ReportRegionStateTransitionResponse reportRegionStateTransition( 095 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 096 if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) { 097 if (ARRIVE_REPORT != null) { 098 ARRIVE_REPORT.countDown(); 099 try { 100 RESUME_REPORT.await(); 101 RESUME_REPORT = null; 102 } catch (InterruptedException e) { 103 throw new RuntimeException(e); 104 } 105 } 106 } 107 return super.reportRegionStateTransition(req); 108 } 109 110 @Override 111 public List<RegionInfo> getRegionsOnServer(ServerName serverName) { 112 List<RegionInfo> regions = super.getRegionsOnServer(serverName); 113 if (ARRIVE_GET != null) { 114 ARRIVE_GET.countDown(); 115 try { 116 RESUME_GET.await(); 117 RESUME_GET = null; 118 } catch (InterruptedException e) { 119 throw new RuntimeException(e); 120 } 121 } 122 return regions; 123 } 124 125 } 126 127 public static final class HMasterForTest extends HMaster { 128 129 public HMasterForTest(Configuration conf) throws IOException { 130 super(conf); 131 } 132 133 @Override 134 protected AssignmentManager createAssignmentManager(MasterServices master, 135 MasterRegion masterRegion) { 136 return new AssignmentManagerForTest(master, masterRegion); 137 } 138 139 @Override 140 protected ServerManager createServerManager(MasterServices master, RegionServerList storage) 141 throws IOException { 142 setupClusterConnection(); 143 return new ServerManagerForTest(master, storage); 144 } 145 } 146 147 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 148 149 private static TableName NAME = TableName.valueOf("Assign"); 150 151 private static byte[] CF = Bytes.toBytes("cf"); 152 153 @BeforeAll 154 public static void setUp() throws Exception { 155 UTIL.startMiniCluster(StartTestingClusterOption.builder().masterClass(HMasterForTest.class) 156 .numMasters(1).numRegionServers(3).build()); 157 UTIL.createTable(NAME, CF); 158 UTIL.waitTableAvailable(NAME); 159 UTIL.getAdmin().balancerSwitch(false, true); 160 } 161 162 @AfterAll 163 public static void tearDown() throws Exception { 164 UTIL.shutdownMiniCluster(); 165 } 166 167 @Test 168 public void test() throws Exception { 169 RegionInfo region = 170 Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo(); 171 HMaster master = UTIL.getMiniHBaseCluster().getMaster(); 172 AssignmentManager am = master.getAssignmentManager(); 173 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 174 ServerName source = rsn.getRegionLocation(); 175 ServerName dest = 176 UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get(); 177 178 ARRIVE_REPORT = new CountDownLatch(1); 179 RESUME_REPORT = new CountDownLatch(1); 180 181 Future<?> future = am.moveAsync(new RegionPlan(region, source, dest)); 182 183 ARRIVE_REPORT.await(); 184 ARRIVE_REPORT = null; 185 // let's get procedure lock to stop the TRSP 186 IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock(); 187 long procId = master.getProcedures().stream() 188 .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId(); 189 IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId); 190 RESUME_REPORT.countDown(); 191 192 // kill the source region server 193 ARRIVE_GET = new CountDownLatch(1); 194 RESUME_GET = new CountDownLatch(1); 195 UTIL.getMiniHBaseCluster().killRegionServer(source); 196 197 // wait until we try to get the region list of the region server 198 ARRIVE_GET.await(); 199 ARRIVE_GET = null; 200 // release the procedure lock and let the TRSP to finish 201 procExecutionLock.releaseLockEntry(lockEntry); 202 future.get(); 203 204 // resume the SCP 205 EXCLUDE_SERVERS.add(dest); 206 RESUME_GET.countDown(); 207 // wait until there are no SCPs and TRSPs 208 UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished() 209 || (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure)))); 210 211 // assert the region is only on the dest server. 212 HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest); 213 assertNotNull(rs.getRegion(region.getEncodedName())); 214 assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName())); 215 } 216}