001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.net.ConnectException;
024import java.util.List;
025import java.util.concurrent.CopyOnWriteArrayList;
026import java.util.concurrent.CountDownLatch;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtil;
030import org.apache.hadoop.hbase.PleaseHoldException;
031import org.apache.hadoop.hbase.ServerName;
032import org.apache.hadoop.hbase.SingleProcessHBaseCluster.MiniHBaseClusterRegionServer;
033import org.apache.hadoop.hbase.StartTestingClusterOption;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.RegionServerList;
040import org.apache.hadoop.hbase.master.RegionState;
041import org.apache.hadoop.hbase.master.ServerManager;
042import org.apache.hadoop.hbase.master.region.MasterRegion;
043import org.apache.hadoop.hbase.regionserver.HRegionServer;
044import org.apache.hadoop.hbase.regionserver.RSRpcServices;
045import org.apache.hadoop.hbase.testclassification.LargeTests;
046import org.apache.hadoop.hbase.testclassification.MasterTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
058import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
059
060import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
061import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest;
062import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
063import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
064import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
065import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
066import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
067
068/**
069 * Testcase for HBASE-21811.
070 */
071@Category({ MasterTests.class, LargeTests.class })
072public class TestWakeUpUnexpectedProcedure {
073
074  @ClassRule
075  public static final HBaseClassTestRule CLASS_RULE =
076    HBaseClassTestRule.forClass(TestWakeUpUnexpectedProcedure.class);
077
078  private static final Logger LOG = LoggerFactory.getLogger(TestWakeUpUnexpectedProcedure.class);
079
080  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
081
082  private static TableName NAME = TableName.valueOf("Assign");
083
084  private static final List<ServerName> EXCLUDE_SERVERS = new CopyOnWriteArrayList<>();
085
086  private static byte[] CF = Bytes.toBytes("cf");
087
088  private static volatile ServerName SERVER_TO_KILL;
089
090  private static volatile CountDownLatch ARRIVE_EXEC_PROC;
091
092  private static volatile CountDownLatch RESUME_EXEC_PROC;
093
094  private static volatile CountDownLatch RESUME_IS_SERVER_ONLINE;
095
096  private static volatile CountDownLatch ARRIVE_REPORT;
097
098  private static volatile CountDownLatch RESUME_REPORT;
099
100  private static final class RSRpcServicesForTest extends RSRpcServices {
101
102    public RSRpcServicesForTest(HRegionServer rs) throws IOException {
103      super(rs);
104    }
105
106    @Override
107    public ExecuteProceduresResponse executeProcedures(RpcController controller,
108      ExecuteProceduresRequest request) throws ServiceException {
109      if (request.getOpenRegionCount() > 0) {
110        if (ARRIVE_EXEC_PROC != null) {
111          SERVER_TO_KILL = getServer().getServerName();
112          ARRIVE_EXEC_PROC.countDown();
113          ARRIVE_EXEC_PROC = null;
114          try {
115            RESUME_EXEC_PROC.await();
116          } catch (InterruptedException e) {
117            throw new RuntimeException(e);
118          }
119          throw new ServiceException(new ConnectException("Inject error"));
120        }
121      }
122      return super.executeProcedures(controller, request);
123    }
124  }
125
126  public static final class RSForTest extends MiniHBaseClusterRegionServer {
127
128    public RSForTest(Configuration conf) throws IOException, InterruptedException {
129      super(conf);
130    }
131
132    @Override
133    protected RSRpcServices createRpcServices() throws IOException {
134      return new RSRpcServicesForTest(this);
135    }
136  }
137
138  private static final class AMForTest extends AssignmentManager {
139
140    public AMForTest(MasterServices master, MasterRegion masterRegion) {
141      super(master, masterRegion);
142    }
143
144    @Override
145    public ReportRegionStateTransitionResponse reportRegionStateTransition(
146      ReportRegionStateTransitionRequest req) throws PleaseHoldException {
147      RegionStateTransition rst = req.getTransition(0);
148      if (
149        rst.getTransitionCode() == TransitionCode.OPENED
150          && ProtobufUtil.toTableName(rst.getRegionInfo(0).getTableName()).equals(NAME)
151      ) {
152        CountDownLatch arrive = ARRIVE_REPORT;
153        if (ARRIVE_REPORT != null) {
154          ARRIVE_REPORT = null;
155          arrive.countDown();
156          // so we will choose another rs next time
157          EXCLUDE_SERVERS.add(ProtobufUtil.toServerName(req.getServer()));
158          try {
159            RESUME_REPORT.await();
160          } catch (InterruptedException e) {
161            throw new RuntimeException();
162          }
163        }
164      }
165      return super.reportRegionStateTransition(req);
166    }
167  }
168
169  private static final class SMForTest extends ServerManager {
170
171    public SMForTest(MasterServices master, RegionServerList storage) {
172      super(master, storage);
173    }
174
175    @Override
176    public boolean isServerOnline(ServerName serverName) {
177      ServerName toKill = SERVER_TO_KILL;
178      if (toKill != null && toKill.equals(serverName)) {
179        for (StackTraceElement ele : new Exception().getStackTrace()) {
180          // halt it is called from RSProcedureDispatcher, to delay the remoteCallFailed.
181          if ("scheduleForRetry".equals(ele.getMethodName())) {
182            if (RESUME_IS_SERVER_ONLINE != null) {
183              try {
184                RESUME_IS_SERVER_ONLINE.await();
185              } catch (InterruptedException e) {
186                throw new RuntimeException(e);
187              }
188            }
189            break;
190          }
191        }
192      }
193      return super.isServerOnline(serverName);
194    }
195
196    @Override
197    public List<ServerName> createDestinationServersList() {
198      return super.createDestinationServersList(EXCLUDE_SERVERS);
199    }
200  }
201
202  public static final class HMasterForTest extends HMaster {
203
204    public HMasterForTest(Configuration conf) throws IOException {
205      super(conf);
206    }
207
208    @Override
209    protected AssignmentManager createAssignmentManager(MasterServices master,
210      MasterRegion masterRegion) {
211      return new AMForTest(master, masterRegion);
212    }
213
214    @Override
215    protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
216      throws IOException {
217      setupClusterConnection();
218      return new SMForTest(master, storage);
219    }
220  }
221
222  @BeforeClass
223  public static void setUp() throws Exception {
224    UTIL.startMiniCluster(StartTestingClusterOption.builder().numMasters(1)
225      .masterClass(HMasterForTest.class).numRegionServers(3).rsClass(RSForTest.class).build());
226    UTIL.createTable(NAME, CF);
227    // Here the test region must not be hosted on the same rs with meta region.
228    // We have 3 RSes and only two regions(meta and the test region), so they will not likely to be
229    // hosted on the same RS.
230    UTIL.waitTableAvailable(NAME);
231    UTIL.getAdmin().balancerSwitch(false, true);
232  }
233
234  @AfterClass
235  public static void tearDown() throws Exception {
236    UTIL.shutdownMiniCluster();
237  }
238
239  @SuppressWarnings("FutureReturnValueIgnored")
240  @Test
241  public void test() throws Exception {
242    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
243    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
244    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
245
246    ServerName sn = rsn.getRegionLocation();
247    RESUME_EXEC_PROC = new CountDownLatch(1);
248    ARRIVE_EXEC_PROC = new CountDownLatch(1);
249    RESUME_IS_SERVER_ONLINE = new CountDownLatch(1);
250
251    // reopen the region, and halt the executeProcedures method at RS side
252    am.moveAsync(new RegionPlan(region, sn, sn));
253    ARRIVE_EXEC_PROC.await();
254
255    RESUME_REPORT = new CountDownLatch(1);
256    ARRIVE_REPORT = new CountDownLatch(1);
257
258    // kill the region server
259    ServerName serverToKill = SERVER_TO_KILL;
260    UTIL.getMiniHBaseCluster().stopRegionServer(serverToKill);
261    RESUME_EXEC_PROC.countDown();
262
263    // wait until we are going to open the region on a new rs
264    ARRIVE_REPORT.await();
265
266    // resume the isServerOnline check, to let the rs procedure
267    RESUME_IS_SERVER_ONLINE.countDown();
268
269    // before HBASE-20811 the state could become OPEN, and this is why later the region will be
270    // assigned to two regionservers.
271    for (int i = 0; i < 15; i++) {
272      if (rsn.getState() == RegionState.State.OPEN) {
273        break;
274      }
275      Thread.sleep(1000);
276    }
277
278    // resume the old report
279    RESUME_REPORT.countDown();
280
281    // wait a bit to let the region to be online, it is not easy to write a condition for this so
282    // just sleep a while.
283    Thread.sleep(10000);
284
285    // confirm that the region is only on one rs
286    int count = 0;
287    for (RegionServerThread t : UTIL.getMiniHBaseCluster().getRegionServerThreads()) {
288      if (!t.getRegionServer().getRegions(NAME).isEmpty()) {
289        LOG.info("{} is on {}", region, t.getRegionServer().getServerName());
290        count++;
291      }
292    }
293    assertEquals(1, count);
294  }
295}