001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.assertEquals; 021 022import java.io.IOException; 023import java.net.ConnectException; 024import java.util.List; 025import java.util.concurrent.CopyOnWriteArrayList; 026import java.util.concurrent.CountDownLatch; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.hbase.HBaseClassTestRule; 029import org.apache.hadoop.hbase.HBaseTestingUtility; 030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; 031import org.apache.hadoop.hbase.PleaseHoldException; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.StartMiniClusterOption; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.master.HMaster; 037import org.apache.hadoop.hbase.master.MasterServices; 038import org.apache.hadoop.hbase.master.RegionPlan; 039import org.apache.hadoop.hbase.master.RegionState; 040import org.apache.hadoop.hbase.master.ServerManager; 041import org.apache.hadoop.hbase.regionserver.HRegionServer; 042import org.apache.hadoop.hbase.regionserver.RSRpcServices; 043import org.apache.hadoop.hbase.testclassification.LargeTests; 044import org.apache.hadoop.hbase.testclassification.MasterTests; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 047import org.junit.AfterClass; 048import org.junit.BeforeClass; 049import org.junit.ClassRule; 050import org.junit.Test; 051import org.junit.experimental.categories.Category; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 056import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 057 058import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 059import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest; 060import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse; 061import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition; 062import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 063import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 064import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 065 066/** 067 * Testcase for HBASE-21811. 068 */ 069@Category({ MasterTests.class, LargeTests.class }) 070public class TestWakeUpUnexpectedProcedure { 071 072 @ClassRule 073 public static final HBaseClassTestRule CLASS_RULE = 074 HBaseClassTestRule.forClass(TestWakeUpUnexpectedProcedure.class); 075 076 private static final Logger LOG = LoggerFactory.getLogger(TestWakeUpUnexpectedProcedure.class); 077 078 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 079 080 private static TableName NAME = TableName.valueOf("Assign"); 081 082 private static final List<ServerName> EXCLUDE_SERVERS = new CopyOnWriteArrayList<>(); 083 084 private static byte[] CF = Bytes.toBytes("cf"); 085 086 private static volatile ServerName SERVER_TO_KILL; 087 088 private static volatile CountDownLatch ARRIVE_EXEC_PROC; 089 090 private static volatile CountDownLatch RESUME_EXEC_PROC; 091 092 private static volatile CountDownLatch RESUME_IS_SERVER_ONLINE; 093 094 private static volatile CountDownLatch ARRIVE_REPORT; 095 096 private static volatile CountDownLatch RESUME_REPORT; 097 098 private static final class RSRpcServicesForTest extends RSRpcServices { 099 100 public RSRpcServicesForTest(HRegionServer rs) throws IOException { 101 super(rs); 102 } 103 104 @Override 105 public ExecuteProceduresResponse executeProcedures(RpcController controller, 106 ExecuteProceduresRequest request) throws ServiceException { 107 if (request.getOpenRegionCount() > 0) { 108 if (ARRIVE_EXEC_PROC != null) { 109 SERVER_TO_KILL = regionServer.getServerName(); 110 ARRIVE_EXEC_PROC.countDown(); 111 ARRIVE_EXEC_PROC = null; 112 try { 113 RESUME_EXEC_PROC.await(); 114 } catch (InterruptedException e) { 115 throw new RuntimeException(e); 116 } 117 throw new ServiceException(new ConnectException("Inject error")); 118 } 119 } 120 return super.executeProcedures(controller, request); 121 } 122 } 123 124 public static final class RSForTest extends MiniHBaseClusterRegionServer { 125 126 public RSForTest(Configuration conf) throws IOException, InterruptedException { 127 super(conf); 128 } 129 130 @Override 131 protected RSRpcServices createRpcServices() throws IOException { 132 return new RSRpcServicesForTest(this); 133 } 134 } 135 136 private static final class AMForTest extends AssignmentManager { 137 138 public AMForTest(MasterServices master) { 139 super(master); 140 } 141 142 @Override 143 public ReportRegionStateTransitionResponse reportRegionStateTransition( 144 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 145 RegionStateTransition rst = req.getTransition(0); 146 if (rst.getTransitionCode() == TransitionCode.OPENED && 147 ProtobufUtil.toTableName(rst.getRegionInfo(0).getTableName()).equals(NAME)) { 148 CountDownLatch arrive = ARRIVE_REPORT; 149 if (ARRIVE_REPORT != null) { 150 ARRIVE_REPORT = null; 151 arrive.countDown(); 152 // so we will choose another rs next time 153 EXCLUDE_SERVERS.add(ProtobufUtil.toServerName(req.getServer())); 154 try { 155 RESUME_REPORT.await(); 156 } catch (InterruptedException e) { 157 throw new RuntimeException(); 158 } 159 } 160 } 161 return super.reportRegionStateTransition(req); 162 } 163 } 164 165 private static final class SMForTest extends ServerManager { 166 167 public SMForTest(MasterServices master) { 168 super(master); 169 } 170 171 @Override 172 public boolean isServerOnline(ServerName serverName) { 173 ServerName toKill = SERVER_TO_KILL; 174 if (toKill != null && toKill.equals(serverName)) { 175 for (StackTraceElement ele : new Exception().getStackTrace()) { 176 // halt it is called from RSProcedureDispatcher, to delay the remoteCallFailed. 177 if ("scheduleForRetry".equals(ele.getMethodName())) { 178 if (RESUME_IS_SERVER_ONLINE != null) { 179 try { 180 RESUME_IS_SERVER_ONLINE.await(); 181 } catch (InterruptedException e) { 182 throw new RuntimeException(e); 183 } 184 } 185 break; 186 } 187 } 188 } 189 return super.isServerOnline(serverName); 190 } 191 192 @Override 193 public List<ServerName> createDestinationServersList() { 194 return super.createDestinationServersList(EXCLUDE_SERVERS); 195 } 196 } 197 198 public static final class HMasterForTest extends HMaster { 199 200 public HMasterForTest(Configuration conf) throws IOException { 201 super(conf); 202 } 203 204 @Override 205 protected AssignmentManager createAssignmentManager(MasterServices master) { 206 return new AMForTest(master); 207 } 208 209 @Override 210 protected ServerManager createServerManager(MasterServices master) throws IOException { 211 setupClusterConnection(); 212 return new SMForTest(master); 213 } 214 } 215 216 @BeforeClass 217 public static void setUp() throws Exception { 218 UTIL.startMiniCluster(StartMiniClusterOption.builder().numMasters(1) 219 .masterClass(HMasterForTest.class).numRegionServers(3).rsClass(RSForTest.class).build()); 220 UTIL.createTable(NAME, CF); 221 // Here the test region must not be hosted on the same rs with meta region. 222 // We have 3 RSes and only two regions(meta and the test region), so they will not likely to be 223 // hosted on the same RS. 224 UTIL.waitTableAvailable(NAME); 225 UTIL.getAdmin().balancerSwitch(false, true); 226 } 227 228 @AfterClass 229 public static void tearDown() throws Exception { 230 UTIL.shutdownMiniCluster(); 231 } 232 233 @Test 234 public void test() throws Exception { 235 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 236 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 237 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 238 239 ServerName sn = rsn.getRegionLocation(); 240 RESUME_EXEC_PROC = new CountDownLatch(1); 241 ARRIVE_EXEC_PROC = new CountDownLatch(1); 242 RESUME_IS_SERVER_ONLINE = new CountDownLatch(1); 243 244 // reopen the region, and halt the executeProcedures method at RS side 245 am.moveAsync(new RegionPlan(region, sn, sn)); 246 ARRIVE_EXEC_PROC.await(); 247 248 RESUME_REPORT = new CountDownLatch(1); 249 ARRIVE_REPORT = new CountDownLatch(1); 250 251 // kill the region server 252 ServerName serverToKill = SERVER_TO_KILL; 253 UTIL.getMiniHBaseCluster().stopRegionServer(serverToKill); 254 RESUME_EXEC_PROC.countDown(); 255 256 // wait until we are going to open the region on a new rs 257 ARRIVE_REPORT.await(); 258 259 // resume the isServerOnline check, to let the rs procedure 260 RESUME_IS_SERVER_ONLINE.countDown(); 261 262 // before HBASE-20811 the state could become OPEN, and this is why later the region will be 263 // assigned to two regionservers. 264 for (int i = 0; i < 15; i++) { 265 if (rsn.getState() == RegionState.State.OPEN) { 266 break; 267 } 268 Thread.sleep(1000); 269 } 270 271 // resume the old report 272 RESUME_REPORT.countDown(); 273 274 // wait a bit to let the region to be online, it is not easy to write a condition for this so 275 // just sleep a while. 276 Thread.sleep(10000); 277 278 // confirm that the region is only on one rs 279 int count = 0; 280 for (RegionServerThread t : UTIL.getMiniHBaseCluster().getRegionServerThreads()) { 281 if (!t.getRegionServer().getRegions(NAME).isEmpty()) { 282 LOG.info("{} is on {}", region, t.getRegionServer().getServerName()); 283 count++; 284 } 285 } 286 assertEquals(1, count); 287 } 288}