001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.net.ConnectException;
024import java.util.List;
025import java.util.concurrent.CopyOnWriteArrayList;
026import java.util.concurrent.CountDownLatch;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtility;
030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
031import org.apache.hadoop.hbase.PleaseHoldException;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.StartMiniClusterOption;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.RegionState;
040import org.apache.hadoop.hbase.master.ServerManager;
041import org.apache.hadoop.hbase.regionserver.HRegionServer;
042import org.apache.hadoop.hbase.regionserver.RSRpcServices;
043import org.apache.hadoop.hbase.testclassification.LargeTests;
044import org.apache.hadoop.hbase.testclassification.MasterTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
047import org.junit.AfterClass;
048import org.junit.BeforeClass;
049import org.junit.ClassRule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
056import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
057
058import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
059import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest;
060import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
061import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
062import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
063import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
064import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
065
066/**
067 * Testcase for HBASE-21811.
068 */
069@Category({ MasterTests.class, LargeTests.class })
070public class TestWakeUpUnexpectedProcedure {
071
072  @ClassRule
073  public static final HBaseClassTestRule CLASS_RULE =
074    HBaseClassTestRule.forClass(TestWakeUpUnexpectedProcedure.class);
075
076  private static final Logger LOG = LoggerFactory.getLogger(TestWakeUpUnexpectedProcedure.class);
077
078  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
079
080  private static TableName NAME = TableName.valueOf("Assign");
081
082  private static final List<ServerName> EXCLUDE_SERVERS = new CopyOnWriteArrayList<>();
083
084  private static byte[] CF = Bytes.toBytes("cf");
085
086  private static volatile ServerName SERVER_TO_KILL;
087
088  private static volatile CountDownLatch ARRIVE_EXEC_PROC;
089
090  private static volatile CountDownLatch RESUME_EXEC_PROC;
091
092  private static volatile CountDownLatch RESUME_IS_SERVER_ONLINE;
093
094  private static volatile CountDownLatch ARRIVE_REPORT;
095
096  private static volatile CountDownLatch RESUME_REPORT;
097
098  private static final class RSRpcServicesForTest extends RSRpcServices {
099
100    public RSRpcServicesForTest(HRegionServer rs) throws IOException {
101      super(rs);
102    }
103
104    @Override
105    public ExecuteProceduresResponse executeProcedures(RpcController controller,
106        ExecuteProceduresRequest request) throws ServiceException {
107      if (request.getOpenRegionCount() > 0) {
108        if (ARRIVE_EXEC_PROC != null) {
109          SERVER_TO_KILL = regionServer.getServerName();
110          ARRIVE_EXEC_PROC.countDown();
111          ARRIVE_EXEC_PROC = null;
112          try {
113            RESUME_EXEC_PROC.await();
114          } catch (InterruptedException e) {
115            throw new RuntimeException(e);
116          }
117          throw new ServiceException(new ConnectException("Inject error"));
118        }
119      }
120      return super.executeProcedures(controller, request);
121    }
122  }
123
124  public static final class RSForTest extends MiniHBaseClusterRegionServer {
125
126    public RSForTest(Configuration conf) throws IOException, InterruptedException {
127      super(conf);
128    }
129
130    @Override
131    protected RSRpcServices createRpcServices() throws IOException {
132      return new RSRpcServicesForTest(this);
133    }
134  }
135
136  private static final class AMForTest extends AssignmentManager {
137
138    public AMForTest(MasterServices master) {
139      super(master);
140    }
141
142    @Override
143    public ReportRegionStateTransitionResponse reportRegionStateTransition(
144        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
145      RegionStateTransition rst = req.getTransition(0);
146      if (rst.getTransitionCode() == TransitionCode.OPENED &&
147        ProtobufUtil.toTableName(rst.getRegionInfo(0).getTableName()).equals(NAME)) {
148        CountDownLatch arrive = ARRIVE_REPORT;
149        if (ARRIVE_REPORT != null) {
150          ARRIVE_REPORT = null;
151          arrive.countDown();
152          // so we will choose another rs next time
153          EXCLUDE_SERVERS.add(ProtobufUtil.toServerName(req.getServer()));
154          try {
155            RESUME_REPORT.await();
156          } catch (InterruptedException e) {
157            throw new RuntimeException();
158          }
159        }
160      }
161      return super.reportRegionStateTransition(req);
162    }
163  }
164
165  private static final class SMForTest extends ServerManager {
166
167    public SMForTest(MasterServices master) {
168      super(master);
169    }
170
171    @Override
172    public boolean isServerOnline(ServerName serverName) {
173      ServerName toKill = SERVER_TO_KILL;
174      if (toKill != null && toKill.equals(serverName)) {
175        for (StackTraceElement ele : new Exception().getStackTrace()) {
176          // halt it is called from RSProcedureDispatcher, to delay the remoteCallFailed.
177          if ("scheduleForRetry".equals(ele.getMethodName())) {
178            if (RESUME_IS_SERVER_ONLINE != null) {
179              try {
180                RESUME_IS_SERVER_ONLINE.await();
181              } catch (InterruptedException e) {
182                throw new RuntimeException(e);
183              }
184            }
185            break;
186          }
187        }
188      }
189      return super.isServerOnline(serverName);
190    }
191
192    @Override
193    public List<ServerName> createDestinationServersList() {
194      return super.createDestinationServersList(EXCLUDE_SERVERS);
195    }
196  }
197
198  public static final class HMasterForTest extends HMaster {
199
200    public HMasterForTest(Configuration conf) throws IOException {
201      super(conf);
202    }
203
204    @Override
205    protected AssignmentManager createAssignmentManager(MasterServices master) {
206      return new AMForTest(master);
207    }
208
209    @Override
210    protected ServerManager createServerManager(MasterServices master) throws IOException {
211      setupClusterConnection();
212      return new SMForTest(master);
213    }
214  }
215
216  @BeforeClass
217  public static void setUp() throws Exception {
218    UTIL.startMiniCluster(StartMiniClusterOption.builder().numMasters(1)
219      .masterClass(HMasterForTest.class).numRegionServers(3).rsClass(RSForTest.class).build());
220    UTIL.createTable(NAME, CF);
221    // Here the test region must not be hosted on the same rs with meta region.
222    // We have 3 RSes and only two regions(meta and the test region), so they will not likely to be
223    // hosted on the same RS.
224    UTIL.waitTableAvailable(NAME);
225    UTIL.getAdmin().balancerSwitch(false, true);
226  }
227
228  @AfterClass
229  public static void tearDown() throws Exception {
230    UTIL.shutdownMiniCluster();
231  }
232
233  @Test
234  public void test() throws Exception {
235    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
236    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
237    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
238
239    ServerName sn = rsn.getRegionLocation();
240    RESUME_EXEC_PROC = new CountDownLatch(1);
241    ARRIVE_EXEC_PROC = new CountDownLatch(1);
242    RESUME_IS_SERVER_ONLINE = new CountDownLatch(1);
243
244    // reopen the region, and halt the executeProcedures method at RS side
245    am.moveAsync(new RegionPlan(region, sn, sn));
246    ARRIVE_EXEC_PROC.await();
247
248    RESUME_REPORT = new CountDownLatch(1);
249    ARRIVE_REPORT = new CountDownLatch(1);
250
251    // kill the region server
252    ServerName serverToKill = SERVER_TO_KILL;
253    UTIL.getMiniHBaseCluster().stopRegionServer(serverToKill);
254    RESUME_EXEC_PROC.countDown();
255
256    // wait until we are going to open the region on a new rs
257    ARRIVE_REPORT.await();
258
259    // resume the isServerOnline check, to let the rs procedure
260    RESUME_IS_SERVER_ONLINE.countDown();
261
262    // before HBASE-20811 the state could become OPEN, and this is why later the region will be
263    // assigned to two regionservers.
264    for (int i = 0; i < 15; i++) {
265      if (rsn.getState() == RegionState.State.OPEN) {
266        break;
267      }
268      Thread.sleep(1000);
269    }
270
271    // resume the old report
272    RESUME_REPORT.countDown();
273
274    // wait a bit to let the region to be online, it is not easy to write a condition for this so
275    // just sleep a while.
276    Thread.sleep(10000);
277
278    // confirm that the region is only on one rs
279    int count = 0;
280    for (RegionServerThread t : UTIL.getMiniHBaseCluster().getRegionServerThreads()) {
281      if (!t.getRegionServer().getRegions(NAME).isEmpty()) {
282        LOG.info("{} is on {}", region, t.getRegionServer().getServerName());
283        count++;
284      }
285    }
286    assertEquals(1, count);
287  }
288}