001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.assertNotEquals; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.List; 025import java.util.concurrent.CountDownLatch; 026import java.util.concurrent.ExecutionException; 027import java.util.concurrent.Future; 028import java.util.concurrent.TimeUnit; 029import java.util.concurrent.TimeoutException; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseIOException; 033import org.apache.hadoop.hbase.HBaseTestingUtility; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.PleaseHoldException; 036import org.apache.hadoop.hbase.ServerName; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.master.HMaster; 040import org.apache.hadoop.hbase.master.MasterServices; 041import org.apache.hadoop.hbase.master.RegionPlan; 042import org.apache.hadoop.hbase.master.RegionServerList; 043import org.apache.hadoop.hbase.master.ServerManager; 044import org.apache.hadoop.hbase.master.region.MasterRegion; 045import org.apache.hadoop.hbase.regionserver.HRegionServer; 046import org.apache.hadoop.hbase.testclassification.MasterTests; 047import org.apache.hadoop.hbase.testclassification.MediumTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.junit.AfterClass; 050import org.junit.BeforeClass; 051import org.junit.ClassRule; 052import org.junit.Test; 053import org.junit.experimental.categories.Category; 054 055import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 057import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 058 059@Category({ MasterTests.class, MediumTests.class }) 060public class TestReportRegionStateTransitionFromDeadServer { 061 062 @ClassRule 063 public static final HBaseClassTestRule CLASS_RULE = 064 HBaseClassTestRule.forClass(TestReportRegionStateTransitionFromDeadServer.class); 065 066 private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>(); 067 068 private static CountDownLatch ARRIVE_GET_REGIONS; 069 private static CountDownLatch RESUME_GET_REGIONS; 070 private static CountDownLatch ARRIVE_REPORT; 071 private static CountDownLatch RESUME_REPORT; 072 073 private static final class ServerManagerForTest extends ServerManager { 074 075 public ServerManagerForTest(MasterServices master, RegionServerList storage) { 076 super(master, storage); 077 } 078 079 @Override 080 public List<ServerName> createDestinationServersList() { 081 return super.createDestinationServersList(EXCLUDE_SERVERS); 082 } 083 } 084 085 private static final class AssignmentManagerForTest extends AssignmentManager { 086 087 public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) { 088 super(master, masterRegion); 089 } 090 091 @Override 092 public List<RegionInfo> getRegionsOnServer(ServerName serverName) { 093 List<RegionInfo> regions = super.getRegionsOnServer(serverName); 094 if (ARRIVE_GET_REGIONS != null) { 095 ARRIVE_GET_REGIONS.countDown(); 096 try { 097 RESUME_GET_REGIONS.await(); 098 } catch (InterruptedException e) { 099 } 100 } 101 return regions; 102 } 103 104 @Override 105 public ReportRegionStateTransitionResponse reportRegionStateTransition( 106 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 107 if ( 108 ARRIVE_REPORT != null && req.getTransitionList().stream() 109 .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion()) 110 ) { 111 ARRIVE_REPORT.countDown(); 112 try { 113 RESUME_REPORT.await(); 114 } catch (InterruptedException e) { 115 } 116 } 117 return super.reportRegionStateTransition(req); 118 } 119 } 120 121 public static final class HMasterForTest extends HMaster { 122 123 public HMasterForTest(Configuration conf) throws IOException { 124 super(conf); 125 } 126 127 @Override 128 protected AssignmentManager createAssignmentManager(MasterServices master, 129 MasterRegion masterRegion) { 130 return new AssignmentManagerForTest(master, masterRegion); 131 } 132 133 @Override 134 protected ServerManager createServerManager(MasterServices master, RegionServerList storage) 135 throws IOException { 136 setupClusterConnection(); 137 return new ServerManagerForTest(master, storage); 138 } 139 } 140 141 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 142 143 private static TableName NAME = TableName.valueOf("Report"); 144 145 private static byte[] CF = Bytes.toBytes("cf"); 146 147 @BeforeClass 148 public static void setUp() throws Exception { 149 UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 150 UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000); 151 UTIL.startMiniCluster(3); 152 UTIL.getAdmin().balancerSwitch(false, true); 153 UTIL.createTable(NAME, CF); 154 UTIL.waitTableAvailable(NAME); 155 } 156 157 @AfterClass 158 public static void tearDown() throws Exception { 159 UTIL.shutdownMiniCluster(); 160 } 161 162 @Test 163 public void test() throws HBaseIOException, InterruptedException, ExecutionException { 164 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 165 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 166 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 167 168 // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify 169 // that the region should not be on rs1 and rs2 both. 170 HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation()); 171 HRegionServer rs1 = UTIL.getOtherRegionServer(rs0); 172 HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream() 173 .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get(); 174 175 RESUME_REPORT = new CountDownLatch(1); 176 ARRIVE_REPORT = new CountDownLatch(1); 177 Future<?> future = 178 am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName())); 179 ARRIVE_REPORT.await(); 180 181 RESUME_GET_REGIONS = new CountDownLatch(1); 182 ARRIVE_GET_REGIONS = new CountDownLatch(1); 183 rs0.abort("For testing!"); 184 185 ARRIVE_GET_REGIONS.await(); 186 RESUME_REPORT.countDown(); 187 188 try { 189 future.get(15, TimeUnit.SECONDS); 190 } catch (TimeoutException e) { 191 // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any 192 // more before SCP interrupts it. It's OK. 193 } 194 195 EXCLUDE_SERVERS.add(rs1.getServerName()); 196 RESUME_GET_REGIONS.countDown(); 197 // wait until there are no running procedures, no SCP and no TRSP 198 UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor() 199 .getActiveProcIds().isEmpty()); 200 boolean onRS1 = !rs1.getRegions(NAME).isEmpty(); 201 boolean onRS2 = !rs2.getRegions(NAME).isEmpty(); 202 assertNotEquals( 203 "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2, onRS1, 204 onRS2); 205 } 206}