001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.assertNotEquals; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.List; 025import java.util.concurrent.CountDownLatch; 026import java.util.concurrent.ExecutionException; 027import java.util.concurrent.Future; 028import java.util.concurrent.TimeUnit; 029import java.util.concurrent.TimeoutException; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseIOException; 033import org.apache.hadoop.hbase.HBaseTestingUtility; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.PleaseHoldException; 036import org.apache.hadoop.hbase.ServerName; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.master.HMaster; 040import org.apache.hadoop.hbase.master.MasterServices; 041import org.apache.hadoop.hbase.master.RegionPlan; 042import org.apache.hadoop.hbase.master.ServerManager; 043import org.apache.hadoop.hbase.regionserver.HRegionServer; 044import org.apache.hadoop.hbase.testclassification.MasterTests; 045import org.apache.hadoop.hbase.testclassification.MediumTests; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.apache.zookeeper.KeeperException; 048import org.junit.AfterClass; 049import org.junit.BeforeClass; 050import org.junit.ClassRule; 051import org.junit.Test; 052import org.junit.experimental.categories.Category; 053 054import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 057 058@Category({ MasterTests.class, MediumTests.class }) 059public class TestReportRegionStateTransitionFromDeadServer { 060 061 @ClassRule 062 public static final HBaseClassTestRule CLASS_RULE = 063 HBaseClassTestRule.forClass(TestReportRegionStateTransitionFromDeadServer.class); 064 065 private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>(); 066 067 private static CountDownLatch ARRIVE_GET_REGIONS; 068 private static CountDownLatch RESUME_GET_REGIONS; 069 private static CountDownLatch ARRIVE_REPORT; 070 private static CountDownLatch RESUME_REPORT; 071 072 private static final class ServerManagerForTest extends ServerManager { 073 074 public ServerManagerForTest(MasterServices master) { 075 super(master); 076 } 077 078 @Override 079 public List<ServerName> createDestinationServersList() { 080 return super.createDestinationServersList(EXCLUDE_SERVERS); 081 } 082 } 083 084 private static final class AssignmentManagerForTest extends AssignmentManager { 085 086 public AssignmentManagerForTest(MasterServices master) { 087 super(master); 088 } 089 090 @Override 091 public List<RegionInfo> getRegionsOnServer(ServerName serverName) { 092 List<RegionInfo> regions = super.getRegionsOnServer(serverName); 093 if (ARRIVE_GET_REGIONS != null) { 094 ARRIVE_GET_REGIONS.countDown(); 095 try { 096 RESUME_GET_REGIONS.await(); 097 } catch (InterruptedException e) { 098 } 099 } 100 return regions; 101 } 102 103 @Override 104 public ReportRegionStateTransitionResponse reportRegionStateTransition( 105 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 106 if (ARRIVE_REPORT != null && req.getTransitionList().stream() 107 .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion())) { 108 ARRIVE_REPORT.countDown(); 109 try { 110 RESUME_REPORT.await(); 111 } catch (InterruptedException e) { 112 } 113 } 114 return super.reportRegionStateTransition(req); 115 } 116 } 117 118 public static final class HMasterForTest extends HMaster { 119 120 public HMasterForTest(Configuration conf) throws IOException, KeeperException { 121 super(conf); 122 } 123 124 @Override 125 protected AssignmentManager createAssignmentManager(MasterServices master) { 126 return new AssignmentManagerForTest(master); 127 } 128 129 @Override 130 protected ServerManager createServerManager(MasterServices master) throws IOException { 131 setupClusterConnection(); 132 return new ServerManagerForTest(master); 133 } 134 } 135 136 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 137 138 private static TableName NAME = TableName.valueOf("Report"); 139 140 private static byte[] CF = Bytes.toBytes("cf"); 141 142 @BeforeClass 143 public static void setUp() throws Exception { 144 UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class); 145 UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000); 146 UTIL.startMiniCluster(3); 147 UTIL.getAdmin().balancerSwitch(false, true); 148 UTIL.createTable(NAME, CF); 149 UTIL.waitTableAvailable(NAME); 150 } 151 152 @AfterClass 153 public static void tearDown() throws Exception { 154 UTIL.shutdownMiniCluster(); 155 } 156 157 @Test 158 public void test() throws HBaseIOException, InterruptedException, ExecutionException { 159 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 160 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 161 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 162 163 // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify 164 // that the region should not be on rs1 and rs2 both. 165 HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation()); 166 HRegionServer rs1 = UTIL.getOtherRegionServer(rs0); 167 HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream() 168 .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get(); 169 170 RESUME_REPORT = new CountDownLatch(1); 171 ARRIVE_REPORT = new CountDownLatch(1); 172 Future<?> future = 173 am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName())); 174 ARRIVE_REPORT.await(); 175 176 RESUME_GET_REGIONS = new CountDownLatch(1); 177 ARRIVE_GET_REGIONS = new CountDownLatch(1); 178 rs0.abort("For testing!"); 179 180 ARRIVE_GET_REGIONS.await(); 181 RESUME_REPORT.countDown(); 182 183 try { 184 future.get(15, TimeUnit.SECONDS); 185 } catch (TimeoutException e) { 186 // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any 187 // more before SCP interrupts it. It's OK. 188 } 189 190 EXCLUDE_SERVERS.add(rs1.getServerName()); 191 RESUME_GET_REGIONS.countDown(); 192 // wait until there are no running procedures, no SCP and no TRSP 193 UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor() 194 .getActiveProcIds().isEmpty()); 195 boolean onRS1 = !rs1.getRegions(NAME).isEmpty(); 196 boolean onRS2 = !rs2.getRegions(NAME).isEmpty(); 197 assertNotEquals( 198 "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2, onRS1, 199 onRS2); 200 } 201}