001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.assertEquals; 021 022import java.io.IOException; 023import java.net.ConnectException; 024import java.util.List; 025import java.util.concurrent.CopyOnWriteArrayList; 026import java.util.concurrent.CountDownLatch; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.hbase.HBaseClassTestRule; 029import org.apache.hadoop.hbase.HBaseTestingUtility; 030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; 031import org.apache.hadoop.hbase.PleaseHoldException; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.StartMiniClusterOption; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.master.HMaster; 037import org.apache.hadoop.hbase.master.MasterServices; 038import org.apache.hadoop.hbase.master.RegionPlan; 039import org.apache.hadoop.hbase.master.RegionState; 040import org.apache.hadoop.hbase.master.ServerManager; 041import org.apache.hadoop.hbase.regionserver.HRegionServer; 042import org.apache.hadoop.hbase.regionserver.RSRpcServices; 043import org.apache.hadoop.hbase.testclassification.LargeTests; 044import org.apache.hadoop.hbase.testclassification.MasterTests; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 047import org.apache.zookeeper.KeeperException; 048import org.junit.AfterClass; 049import org.junit.BeforeClass; 050import org.junit.ClassRule; 051import org.junit.Test; 052import org.junit.experimental.categories.Category; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055 056import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 057import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 058 059import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 060import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest; 061import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse; 062import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition; 063import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 064import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 065import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 066 067/** 068 * Testcase for HBASE-21811. 069 */ 070@Category({ MasterTests.class, LargeTests.class }) 071public class TestWakeUpUnexpectedProcedure { 072 073 @ClassRule 074 public static final HBaseClassTestRule CLASS_RULE = 075 HBaseClassTestRule.forClass(TestWakeUpUnexpectedProcedure.class); 076 077 private static final Logger LOG = LoggerFactory.getLogger(TestWakeUpUnexpectedProcedure.class); 078 079 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 080 081 private static TableName NAME = TableName.valueOf("Assign"); 082 083 private static final List<ServerName> EXCLUDE_SERVERS = new CopyOnWriteArrayList<>(); 084 085 private static byte[] CF = Bytes.toBytes("cf"); 086 087 private static volatile ServerName SERVER_TO_KILL; 088 089 private static volatile CountDownLatch ARRIVE_EXEC_PROC; 090 091 private static volatile CountDownLatch RESUME_EXEC_PROC; 092 093 private static volatile CountDownLatch RESUME_IS_SERVER_ONLINE; 094 095 private static volatile CountDownLatch ARRIVE_REPORT; 096 097 private static volatile CountDownLatch RESUME_REPORT; 098 099 private static final class RSRpcServicesForTest extends RSRpcServices { 100 101 public RSRpcServicesForTest(HRegionServer rs) throws IOException { 102 super(rs); 103 } 104 105 @Override 106 public ExecuteProceduresResponse executeProcedures(RpcController controller, 107 ExecuteProceduresRequest request) throws ServiceException { 108 if (request.getOpenRegionCount() > 0) { 109 if (ARRIVE_EXEC_PROC != null) { 110 SERVER_TO_KILL = regionServer.getServerName(); 111 ARRIVE_EXEC_PROC.countDown(); 112 ARRIVE_EXEC_PROC = null; 113 try { 114 RESUME_EXEC_PROC.await(); 115 } catch (InterruptedException e) { 116 throw new RuntimeException(e); 117 } 118 throw new ServiceException(new ConnectException("Inject error")); 119 } 120 } 121 return super.executeProcedures(controller, request); 122 } 123 } 124 125 public static final class RSForTest extends MiniHBaseClusterRegionServer { 126 127 public RSForTest(Configuration conf) throws IOException, InterruptedException { 128 super(conf); 129 } 130 131 @Override 132 protected RSRpcServices createRpcServices() throws IOException { 133 return new RSRpcServicesForTest(this); 134 } 135 } 136 137 private static final class AMForTest extends AssignmentManager { 138 139 public AMForTest(MasterServices master) { 140 super(master); 141 } 142 143 @Override 144 public ReportRegionStateTransitionResponse reportRegionStateTransition( 145 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 146 RegionStateTransition rst = req.getTransition(0); 147 if (rst.getTransitionCode() == TransitionCode.OPENED && 148 ProtobufUtil.toTableName(rst.getRegionInfo(0).getTableName()).equals(NAME)) { 149 CountDownLatch arrive = ARRIVE_REPORT; 150 if (ARRIVE_REPORT != null) { 151 ARRIVE_REPORT = null; 152 arrive.countDown(); 153 // so we will choose another rs next time 154 EXCLUDE_SERVERS.add(ProtobufUtil.toServerName(req.getServer())); 155 try { 156 RESUME_REPORT.await(); 157 } catch (InterruptedException e) { 158 throw new RuntimeException(); 159 } 160 } 161 } 162 return super.reportRegionStateTransition(req); 163 } 164 } 165 166 private static final class SMForTest extends ServerManager { 167 168 public SMForTest(MasterServices master) { 169 super(master); 170 } 171 172 @Override 173 public boolean isServerOnline(ServerName serverName) { 174 ServerName toKill = SERVER_TO_KILL; 175 if (toKill != null && toKill.equals(serverName)) { 176 for (StackTraceElement ele : new Exception().getStackTrace()) { 177 // halt it is called from RSProcedureDispatcher, to delay the remoteCallFailed. 178 if ("scheduleForRetry".equals(ele.getMethodName())) { 179 if (RESUME_IS_SERVER_ONLINE != null) { 180 try { 181 RESUME_IS_SERVER_ONLINE.await(); 182 } catch (InterruptedException e) { 183 throw new RuntimeException(e); 184 } 185 } 186 break; 187 } 188 } 189 } 190 return super.isServerOnline(serverName); 191 } 192 193 @Override 194 public List<ServerName> createDestinationServersList() { 195 return super.createDestinationServersList(EXCLUDE_SERVERS); 196 } 197 } 198 199 public static final class HMasterForTest extends HMaster { 200 201 public HMasterForTest(Configuration conf) throws IOException, KeeperException { 202 super(conf); 203 } 204 205 @Override 206 protected AssignmentManager createAssignmentManager(MasterServices master) { 207 return new AMForTest(master); 208 } 209 210 @Override 211 protected ServerManager createServerManager(MasterServices master) throws IOException { 212 setupClusterConnection(); 213 return new SMForTest(master); 214 } 215 } 216 217 @BeforeClass 218 public static void setUp() throws Exception { 219 UTIL.startMiniCluster(StartMiniClusterOption.builder().numMasters(1) 220 .masterClass(HMasterForTest.class).numRegionServers(3).rsClass(RSForTest.class).build()); 221 UTIL.createTable(NAME, CF); 222 // Here the test region must not be hosted on the same rs with meta region. 223 // We have 3 RSes and only two regions(meta and the test region), so they will not likely to be 224 // hosted on the same RS. 225 UTIL.waitTableAvailable(NAME); 226 UTIL.getAdmin().balancerSwitch(false, true); 227 } 228 229 @AfterClass 230 public static void tearDown() throws Exception { 231 UTIL.shutdownMiniCluster(); 232 } 233 234 @Test 235 public void test() throws Exception { 236 RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo(); 237 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 238 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 239 240 ServerName sn = rsn.getRegionLocation(); 241 RESUME_EXEC_PROC = new CountDownLatch(1); 242 ARRIVE_EXEC_PROC = new CountDownLatch(1); 243 RESUME_IS_SERVER_ONLINE = new CountDownLatch(1); 244 245 // reopen the region, and halt the executeProcedures method at RS side 246 am.moveAsync(new RegionPlan(region, sn, sn)); 247 ARRIVE_EXEC_PROC.await(); 248 249 RESUME_REPORT = new CountDownLatch(1); 250 ARRIVE_REPORT = new CountDownLatch(1); 251 252 // kill the region server 253 ServerName serverToKill = SERVER_TO_KILL; 254 UTIL.getMiniHBaseCluster().stopRegionServer(serverToKill); 255 RESUME_EXEC_PROC.countDown(); 256 257 // wait until we are going to open the region on a new rs 258 ARRIVE_REPORT.await(); 259 260 // resume the isServerOnline check, to let the rs procedure 261 RESUME_IS_SERVER_ONLINE.countDown(); 262 263 // before HBASE-20811 the state could become OPEN, and this is why later the region will be 264 // assigned to two regionservers. 265 for (int i = 0; i < 15; i++) { 266 if (rsn.getState() == RegionState.State.OPEN) { 267 break; 268 } 269 Thread.sleep(1000); 270 } 271 272 // resume the old report 273 RESUME_REPORT.countDown(); 274 275 // wait a bit to let the region to be online, it is not easy to write a condition for this so 276 // just sleep a while. 277 Thread.sleep(10000); 278 279 // confirm that the region is only on one rs 280 int count = 0; 281 for (RegionServerThread t : UTIL.getMiniHBaseCluster().getRegionServerThreads()) { 282 if (!t.getRegionServer().getRegions(NAME).isEmpty()) { 283 LOG.info("{} is on {}", region, t.getRegionServer().getServerName()); 284 count++; 285 } 286 } 287 assertEquals(1, count); 288 } 289}