001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.net.ConnectException;
024import java.util.List;
025import java.util.concurrent.CopyOnWriteArrayList;
026import java.util.concurrent.CountDownLatch;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtility;
030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
031import org.apache.hadoop.hbase.PleaseHoldException;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.StartMiniClusterOption;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.RegionState;
040import org.apache.hadoop.hbase.master.ServerManager;
041import org.apache.hadoop.hbase.regionserver.HRegionServer;
042import org.apache.hadoop.hbase.regionserver.RSRpcServices;
043import org.apache.hadoop.hbase.testclassification.LargeTests;
044import org.apache.hadoop.hbase.testclassification.MasterTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
047import org.apache.zookeeper.KeeperException;
048import org.junit.AfterClass;
049import org.junit.BeforeClass;
050import org.junit.ClassRule;
051import org.junit.Test;
052import org.junit.experimental.categories.Category;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
057import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
058
059import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
060import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest;
061import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
062import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
063import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
064import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
065import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
066
067/**
068 * Testcase for HBASE-21811.
069 */
070@Category({ MasterTests.class, LargeTests.class })
071public class TestWakeUpUnexpectedProcedure {
072
073  @ClassRule
074  public static final HBaseClassTestRule CLASS_RULE =
075    HBaseClassTestRule.forClass(TestWakeUpUnexpectedProcedure.class);
076
077  private static final Logger LOG = LoggerFactory.getLogger(TestWakeUpUnexpectedProcedure.class);
078
079  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
080
081  private static TableName NAME = TableName.valueOf("Assign");
082
083  private static final List<ServerName> EXCLUDE_SERVERS = new CopyOnWriteArrayList<>();
084
085  private static byte[] CF = Bytes.toBytes("cf");
086
087  private static volatile ServerName SERVER_TO_KILL;
088
089  private static volatile CountDownLatch ARRIVE_EXEC_PROC;
090
091  private static volatile CountDownLatch RESUME_EXEC_PROC;
092
093  private static volatile CountDownLatch RESUME_IS_SERVER_ONLINE;
094
095  private static volatile CountDownLatch ARRIVE_REPORT;
096
097  private static volatile CountDownLatch RESUME_REPORT;
098
099  private static final class RSRpcServicesForTest extends RSRpcServices {
100
101    public RSRpcServicesForTest(HRegionServer rs) throws IOException {
102      super(rs);
103    }
104
105    @Override
106    public ExecuteProceduresResponse executeProcedures(RpcController controller,
107        ExecuteProceduresRequest request) throws ServiceException {
108      if (request.getOpenRegionCount() > 0) {
109        if (ARRIVE_EXEC_PROC != null) {
110          SERVER_TO_KILL = regionServer.getServerName();
111          ARRIVE_EXEC_PROC.countDown();
112          ARRIVE_EXEC_PROC = null;
113          try {
114            RESUME_EXEC_PROC.await();
115          } catch (InterruptedException e) {
116            throw new RuntimeException(e);
117          }
118          throw new ServiceException(new ConnectException("Inject error"));
119        }
120      }
121      return super.executeProcedures(controller, request);
122    }
123  }
124
125  public static final class RSForTest extends MiniHBaseClusterRegionServer {
126
127    public RSForTest(Configuration conf) throws IOException, InterruptedException {
128      super(conf);
129    }
130
131    @Override
132    protected RSRpcServices createRpcServices() throws IOException {
133      return new RSRpcServicesForTest(this);
134    }
135  }
136
137  private static final class AMForTest extends AssignmentManager {
138
139    public AMForTest(MasterServices master) {
140      super(master);
141    }
142
143    @Override
144    public ReportRegionStateTransitionResponse reportRegionStateTransition(
145        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
146      RegionStateTransition rst = req.getTransition(0);
147      if (rst.getTransitionCode() == TransitionCode.OPENED &&
148        ProtobufUtil.toTableName(rst.getRegionInfo(0).getTableName()).equals(NAME)) {
149        CountDownLatch arrive = ARRIVE_REPORT;
150        if (ARRIVE_REPORT != null) {
151          ARRIVE_REPORT = null;
152          arrive.countDown();
153          // so we will choose another rs next time
154          EXCLUDE_SERVERS.add(ProtobufUtil.toServerName(req.getServer()));
155          try {
156            RESUME_REPORT.await();
157          } catch (InterruptedException e) {
158            throw new RuntimeException();
159          }
160        }
161      }
162      return super.reportRegionStateTransition(req);
163    }
164  }
165
166  private static final class SMForTest extends ServerManager {
167
168    public SMForTest(MasterServices master) {
169      super(master);
170    }
171
172    @Override
173    public boolean isServerOnline(ServerName serverName) {
174      ServerName toKill = SERVER_TO_KILL;
175      if (toKill != null && toKill.equals(serverName)) {
176        for (StackTraceElement ele : new Exception().getStackTrace()) {
177          // halt it is called from RSProcedureDispatcher, to delay the remoteCallFailed.
178          if ("scheduleForRetry".equals(ele.getMethodName())) {
179            if (RESUME_IS_SERVER_ONLINE != null) {
180              try {
181                RESUME_IS_SERVER_ONLINE.await();
182              } catch (InterruptedException e) {
183                throw new RuntimeException(e);
184              }
185            }
186            break;
187          }
188        }
189      }
190      return super.isServerOnline(serverName);
191    }
192
193    @Override
194    public List<ServerName> createDestinationServersList() {
195      return super.createDestinationServersList(EXCLUDE_SERVERS);
196    }
197  }
198
199  public static final class HMasterForTest extends HMaster {
200
201    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
202      super(conf);
203    }
204
205    @Override
206    protected AssignmentManager createAssignmentManager(MasterServices master) {
207      return new AMForTest(master);
208    }
209
210    @Override
211    protected ServerManager createServerManager(MasterServices master) throws IOException {
212      setupClusterConnection();
213      return new SMForTest(master);
214    }
215  }
216
217  @BeforeClass
218  public static void setUp() throws Exception {
219    UTIL.startMiniCluster(StartMiniClusterOption.builder().numMasters(1)
220      .masterClass(HMasterForTest.class).numRegionServers(3).rsClass(RSForTest.class).build());
221    UTIL.createTable(NAME, CF);
222    // Here the test region must not be hosted on the same rs with meta region.
223    // We have 3 RSes and only two regions(meta and the test region), so they will not likely to be
224    // hosted on the same RS.
225    UTIL.waitTableAvailable(NAME);
226    UTIL.getAdmin().balancerSwitch(false, true);
227  }
228
229  @AfterClass
230  public static void tearDown() throws Exception {
231    UTIL.shutdownMiniCluster();
232  }
233
234  @Test
235  public void test() throws Exception {
236    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
237    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
238    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
239
240    ServerName sn = rsn.getRegionLocation();
241    RESUME_EXEC_PROC = new CountDownLatch(1);
242    ARRIVE_EXEC_PROC = new CountDownLatch(1);
243    RESUME_IS_SERVER_ONLINE = new CountDownLatch(1);
244
245    // reopen the region, and halt the executeProcedures method at RS side
246    am.moveAsync(new RegionPlan(region, sn, sn));
247    ARRIVE_EXEC_PROC.await();
248
249    RESUME_REPORT = new CountDownLatch(1);
250    ARRIVE_REPORT = new CountDownLatch(1);
251
252    // kill the region server
253    ServerName serverToKill = SERVER_TO_KILL;
254    UTIL.getMiniHBaseCluster().stopRegionServer(serverToKill);
255    RESUME_EXEC_PROC.countDown();
256
257    // wait until we are going to open the region on a new rs
258    ARRIVE_REPORT.await();
259
260    // resume the isServerOnline check, to let the rs procedure
261    RESUME_IS_SERVER_ONLINE.countDown();
262
263    // before HBASE-20811 the state could become OPEN, and this is why later the region will be
264    // assigned to two regionservers.
265    for (int i = 0; i < 15; i++) {
266      if (rsn.getState() == RegionState.State.OPEN) {
267        break;
268      }
269      Thread.sleep(1000);
270    }
271
272    // resume the old report
273    RESUME_REPORT.countDown();
274
275    // wait a bit to let the region to be online, it is not easy to write a condition for this so
276    // just sleep a while.
277    Thread.sleep(10000);
278
279    // confirm that the region is only on one rs
280    int count = 0;
281    for (RegionServerThread t : UTIL.getMiniHBaseCluster().getRegionServerThreads()) {
282      if (!t.getRegionServer().getRegions(NAME).isEmpty()) {
283        LOG.info("{} is on {}", region, t.getRegionServer().getServerName());
284        count++;
285      }
286    }
287    assertEquals(1, count);
288  }
289}