001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.jupiter.api.Assertions.assertNotEquals; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.List; 025import java.util.concurrent.CountDownLatch; 026import java.util.concurrent.ExecutionException; 027import java.util.concurrent.Future; 028import java.util.concurrent.TimeUnit; 029import java.util.concurrent.TimeoutException; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.HBaseIOException; 032import org.apache.hadoop.hbase.HBaseTestingUtil; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.PleaseHoldException; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.TableName; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.master.HMaster; 039import org.apache.hadoop.hbase.master.MasterServices; 040import org.apache.hadoop.hbase.master.RegionPlan; 041import org.apache.hadoop.hbase.master.RegionServerList; 042import org.apache.hadoop.hbase.master.ServerManager; 043import org.apache.hadoop.hbase.master.region.MasterRegion; 044import org.apache.hadoop.hbase.regionserver.HRegionServer; 045import org.apache.hadoop.hbase.testclassification.MasterTests; 046import org.apache.hadoop.hbase.testclassification.MediumTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.junit.jupiter.api.AfterAll; 049import org.junit.jupiter.api.BeforeAll; 050import org.junit.jupiter.api.Tag; 051import org.junit.jupiter.api.Test; 052 053import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 056 057@Tag(MasterTests.TAG) 058@Tag(MediumTests.TAG) 059public class TestReportRegionStateTransitionFromDeadServer { 060 061 private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>(); 062 063 private static CountDownLatch ARRIVE_GET_REGIONS; 064 private static CountDownLatch RESUME_GET_REGIONS; 065 private static CountDownLatch ARRIVE_REPORT; 066 private static CountDownLatch RESUME_REPORT; 067 068 private static final class ServerManagerForTest extends ServerManager { 069 070 public ServerManagerForTest(MasterServices master, RegionServerList storage) { 071 super(master, storage); 072 } 073 074 @Override 075 public List<ServerName> createDestinationServersList() { 076 return super.createDestinationServersList(EXCLUDE_SERVERS); 077 } 078 } 079 080 private static final class AssignmentManagerForTest extends AssignmentManager { 081 082 public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) { 083 super(master, masterRegion); 084 } 085 086 @Override 087 public List<RegionInfo> getRegionsOnServer(ServerName serverName) { 088 List<RegionInfo> regions = super.getRegionsOnServer(serverName); 089 if (ARRIVE_GET_REGIONS != null) { 090 ARRIVE_GET_REGIONS.countDown(); 091 try { 092 RESUME_GET_REGIONS.await(); 093 } catch (InterruptedException e) { 094 } 095 } 096 return regions; 097 } 098 099 @Override 100 public ReportRegionStateTransitionResponse reportRegionStateTransition( 101 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 102 if ( 103 ARRIVE_REPORT != null && req.getTransitionList().stream() 104 .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion()) 105 ) { 106 ARRIVE_REPORT.countDown(); 107 try { 108 RESUME_REPORT.await(); 109 } catch (InterruptedException e) { 110 } 111 } 112 return super.reportRegionStateTransition(req); 113 } 114 } 115 116 public static final class HMasterForTest extends HMaster { 117 118 public HMasterForTest(Configuration conf) throws IOException { 119 super(conf); 120 } 121 122 @Override 123 protected AssignmentManager createAssignmentManager(MasterServices master, 124 MasterRegion masterRegion) { 125 return new AssignmentManagerForTest(master, masterRegion); 126 } 127 128 @Override 129 protected ServerManager createServerManager(MasterServices master, RegionServerList storage) 130 throws IOException { 131 setupClusterConnection(); 132 return new ServerManagerForTest(master, storage); 133 } 134 } 135 136 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 137 138 private static TableName NAME = TableName.valueOf("Report"); 139 140 private static byte[] CF = Bytes.toBytes("cf"); 141 142 @BeforeAll 143 public static void setUp() throws Exception { 144 UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 145 UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000); 146 UTIL.startMiniCluster(3); 147 UTIL.getAdmin().balancerSwitch(false, true); 148 UTIL.createTable(NAME, CF); 149 UTIL.waitTableAvailable(NAME); 150 } 151 152 @AfterAll 153 public static void tearDown() throws Exception { 154 UTIL.shutdownMiniCluster(); 155 } 156 157 @Test 158 public void test() throws HBaseIOException, InterruptedException, ExecutionException { 159 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 160 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 161 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 162 163 // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify 164 // that the region should not be on rs1 and rs2 both. 165 HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation()); 166 HRegionServer rs1 = UTIL.getOtherRegionServer(rs0); 167 HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream() 168 .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get(); 169 170 RESUME_REPORT = new CountDownLatch(1); 171 ARRIVE_REPORT = new CountDownLatch(1); 172 Future<?> future = 173 am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName())); 174 ARRIVE_REPORT.await(); 175 176 RESUME_GET_REGIONS = new CountDownLatch(1); 177 ARRIVE_GET_REGIONS = new CountDownLatch(1); 178 rs0.abort("For testing!"); 179 180 ARRIVE_GET_REGIONS.await(); 181 RESUME_REPORT.countDown(); 182 183 try { 184 future.get(15, TimeUnit.SECONDS); 185 } catch (TimeoutException e) { 186 // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any 187 // more before SCP interrupts it. It's OK. 188 } 189 190 EXCLUDE_SERVERS.add(rs1.getServerName()); 191 RESUME_GET_REGIONS.countDown(); 192 // wait until there are no running procedures, no SCP and no TRSP 193 UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor() 194 .getActiveProcIds().isEmpty()); 195 boolean onRS1 = !rs1.getRegions(NAME).isEmpty(); 196 boolean onRS2 = !rs2.getRegions(NAME).isEmpty(); 197 assertNotEquals(onRS2, onRS1, 198 "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2); 199 } 200}