001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.assertNotEquals; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.List; 025import java.util.concurrent.CountDownLatch; 026import java.util.concurrent.ExecutionException; 027import java.util.concurrent.Future; 028import java.util.concurrent.TimeUnit; 029import java.util.concurrent.TimeoutException; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseIOException; 033import org.apache.hadoop.hbase.HBaseTestingUtility; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.PleaseHoldException; 036import org.apache.hadoop.hbase.ServerName; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.master.HMaster; 040import org.apache.hadoop.hbase.master.MasterServices; 041import org.apache.hadoop.hbase.master.RegionPlan; 042import org.apache.hadoop.hbase.master.ServerManager; 043import org.apache.hadoop.hbase.regionserver.HRegionServer; 044import org.apache.hadoop.hbase.testclassification.MasterTests; 045import org.apache.hadoop.hbase.testclassification.MediumTests; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.junit.AfterClass; 048import org.junit.BeforeClass; 049import org.junit.ClassRule; 050import org.junit.Test; 051import org.junit.experimental.categories.Category; 052 053import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 056 057@Category({ MasterTests.class, MediumTests.class }) 058public class TestReportRegionStateTransitionFromDeadServer { 059 060 @ClassRule 061 public static final HBaseClassTestRule CLASS_RULE = 062 HBaseClassTestRule.forClass(TestReportRegionStateTransitionFromDeadServer.class); 063 064 private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>(); 065 066 private static CountDownLatch ARRIVE_GET_REGIONS; 067 private static CountDownLatch RESUME_GET_REGIONS; 068 private static CountDownLatch ARRIVE_REPORT; 069 private static CountDownLatch RESUME_REPORT; 070 071 private static final class ServerManagerForTest extends ServerManager { 072 073 public ServerManagerForTest(MasterServices master) { 074 super(master); 075 } 076 077 @Override 078 public List<ServerName> createDestinationServersList() { 079 return super.createDestinationServersList(EXCLUDE_SERVERS); 080 } 081 } 082 083 private static final class AssignmentManagerForTest extends AssignmentManager { 084 085 public AssignmentManagerForTest(MasterServices master) { 086 super(master); 087 } 088 089 @Override 090 public List<RegionInfo> getRegionsOnServer(ServerName serverName) { 091 List<RegionInfo> regions = super.getRegionsOnServer(serverName); 092 if (ARRIVE_GET_REGIONS != null) { 093 ARRIVE_GET_REGIONS.countDown(); 094 try { 095 RESUME_GET_REGIONS.await(); 096 } catch (InterruptedException e) { 097 } 098 } 099 return regions; 100 } 101 102 @Override 103 public ReportRegionStateTransitionResponse reportRegionStateTransition( 104 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 105 if (ARRIVE_REPORT != null && req.getTransitionList().stream() 106 .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion())) { 107 ARRIVE_REPORT.countDown(); 108 try { 109 RESUME_REPORT.await(); 110 } catch (InterruptedException e) { 111 } 112 } 113 return super.reportRegionStateTransition(req); 114 } 115 } 116 117 public static final class HMasterForTest extends HMaster { 118 119 public HMasterForTest(Configuration conf) throws IOException { 120 super(conf); 121 } 122 123 @Override 124 protected AssignmentManager createAssignmentManager(MasterServices master) { 125 return new AssignmentManagerForTest(master); 126 } 127 128 @Override 129 protected ServerManager createServerManager(MasterServices master) throws IOException { 130 setupClusterConnection(); 131 return new ServerManagerForTest(master); 132 } 133 } 134 135 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 136 137 private static TableName NAME = TableName.valueOf("Report"); 138 139 private static byte[] CF = Bytes.toBytes("cf"); 140 141 @BeforeClass 142 public static void setUp() throws Exception { 143 UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 144 UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000); 145 UTIL.startMiniCluster(3); 146 UTIL.getAdmin().balancerSwitch(false, true); 147 UTIL.createTable(NAME, CF); 148 UTIL.waitTableAvailable(NAME); 149 } 150 151 @AfterClass 152 public static void tearDown() throws Exception { 153 UTIL.shutdownMiniCluster(); 154 } 155 156 @Test 157 public void test() throws HBaseIOException, InterruptedException, ExecutionException { 158 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 159 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 160 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 161 162 // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify 163 // that the region should not be on rs1 and rs2 both. 164 HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation()); 165 HRegionServer rs1 = UTIL.getOtherRegionServer(rs0); 166 HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream() 167 .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get(); 168 169 RESUME_REPORT = new CountDownLatch(1); 170 ARRIVE_REPORT = new CountDownLatch(1); 171 Future<?> future = 172 am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName())); 173 ARRIVE_REPORT.await(); 174 175 RESUME_GET_REGIONS = new CountDownLatch(1); 176 ARRIVE_GET_REGIONS = new CountDownLatch(1); 177 rs0.abort("For testing!"); 178 179 ARRIVE_GET_REGIONS.await(); 180 RESUME_REPORT.countDown(); 181 182 try { 183 future.get(15, TimeUnit.SECONDS); 184 } catch (TimeoutException e) { 185 // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any 186 // more before SCP interrupts it. It's OK. 187 } 188 189 EXCLUDE_SERVERS.add(rs1.getServerName()); 190 RESUME_GET_REGIONS.countDown(); 191 // wait until there are no running procedures, no SCP and no TRSP 192 UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor() 193 .getActiveProcIds().isEmpty()); 194 boolean onRS1 = !rs1.getRegions(NAME).isEmpty(); 195 boolean onRS2 = !rs2.getRegions(NAME).isEmpty(); 196 assertNotEquals( 197 "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2, onRS1, 198 onRS2); 199 } 200}