001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.net.ConnectException;
024import java.util.List;
025import java.util.concurrent.CopyOnWriteArrayList;
026import java.util.concurrent.CountDownLatch;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtil;
030import org.apache.hadoop.hbase.PleaseHoldException;
031import org.apache.hadoop.hbase.ServerName;
032import org.apache.hadoop.hbase.SingleProcessHBaseCluster.MiniHBaseClusterRegionServer;
033import org.apache.hadoop.hbase.StartTestingClusterOption;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.RegionState;
040import org.apache.hadoop.hbase.master.ServerManager;
041import org.apache.hadoop.hbase.master.region.MasterRegion;
042import org.apache.hadoop.hbase.regionserver.HRegionServer;
043import org.apache.hadoop.hbase.regionserver.RSRpcServices;
044import org.apache.hadoop.hbase.testclassification.LargeTests;
045import org.apache.hadoop.hbase.testclassification.MasterTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
048import org.junit.AfterClass;
049import org.junit.BeforeClass;
050import org.junit.ClassRule;
051import org.junit.Test;
052import org.junit.experimental.categories.Category;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
057import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
058
059import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
060import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest;
061import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
062import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
063import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
064import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
065import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
066
067/**
068 * Testcase for HBASE-21811.
069 */
070@Category({ MasterTests.class, LargeTests.class })
071public class TestWakeUpUnexpectedProcedure {
072
073  @ClassRule
074  public static final HBaseClassTestRule CLASS_RULE =
075    HBaseClassTestRule.forClass(TestWakeUpUnexpectedProcedure.class);
076
077  private static final Logger LOG = LoggerFactory.getLogger(TestWakeUpUnexpectedProcedure.class);
078
079  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
080
081  private static TableName NAME = TableName.valueOf("Assign");
082
083  private static final List<ServerName> EXCLUDE_SERVERS = new CopyOnWriteArrayList<>();
084
085  private static byte[] CF = Bytes.toBytes("cf");
086
087  private static volatile ServerName SERVER_TO_KILL;
088
089  private static volatile CountDownLatch ARRIVE_EXEC_PROC;
090
091  private static volatile CountDownLatch RESUME_EXEC_PROC;
092
093  private static volatile CountDownLatch RESUME_IS_SERVER_ONLINE;
094
095  private static volatile CountDownLatch ARRIVE_REPORT;
096
097  private static volatile CountDownLatch RESUME_REPORT;
098
099  private static final class RSRpcServicesForTest extends RSRpcServices {
100
101    public RSRpcServicesForTest(HRegionServer rs) throws IOException {
102      super(rs);
103    }
104
105    @Override
106    public ExecuteProceduresResponse executeProcedures(RpcController controller,
107        ExecuteProceduresRequest request) throws ServiceException {
108      if (request.getOpenRegionCount() > 0) {
109        if (ARRIVE_EXEC_PROC != null) {
110          SERVER_TO_KILL = getServer().getServerName();
111          ARRIVE_EXEC_PROC.countDown();
112          ARRIVE_EXEC_PROC = null;
113          try {
114            RESUME_EXEC_PROC.await();
115          } catch (InterruptedException e) {
116            throw new RuntimeException(e);
117          }
118          throw new ServiceException(new ConnectException("Inject error"));
119        }
120      }
121      return super.executeProcedures(controller, request);
122    }
123  }
124
125  public static final class RSForTest extends MiniHBaseClusterRegionServer {
126
127    public RSForTest(Configuration conf) throws IOException, InterruptedException {
128      super(conf);
129    }
130
131    @Override
132    protected RSRpcServices createRpcServices() throws IOException {
133      return new RSRpcServicesForTest(this);
134    }
135  }
136
137  private static final class AMForTest extends AssignmentManager {
138
139    public AMForTest(MasterServices master, MasterRegion masterRegion) {
140      super(master, masterRegion);
141    }
142
143    @Override
144    public ReportRegionStateTransitionResponse reportRegionStateTransition(
145        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
146      RegionStateTransition rst = req.getTransition(0);
147      if (rst.getTransitionCode() == TransitionCode.OPENED &&
148        ProtobufUtil.toTableName(rst.getRegionInfo(0).getTableName()).equals(NAME)) {
149        CountDownLatch arrive = ARRIVE_REPORT;
150        if (ARRIVE_REPORT != null) {
151          ARRIVE_REPORT = null;
152          arrive.countDown();
153          // so we will choose another rs next time
154          EXCLUDE_SERVERS.add(ProtobufUtil.toServerName(req.getServer()));
155          try {
156            RESUME_REPORT.await();
157          } catch (InterruptedException e) {
158            throw new RuntimeException();
159          }
160        }
161      }
162      return super.reportRegionStateTransition(req);
163    }
164  }
165
166  private static final class SMForTest extends ServerManager {
167
168    public SMForTest(MasterServices master) {
169      super(master);
170    }
171
172    @Override
173    public boolean isServerOnline(ServerName serverName) {
174      ServerName toKill = SERVER_TO_KILL;
175      if (toKill != null && toKill.equals(serverName)) {
176        for (StackTraceElement ele : new Exception().getStackTrace()) {
177          // halt it is called from RSProcedureDispatcher, to delay the remoteCallFailed.
178          if ("scheduleForRetry".equals(ele.getMethodName())) {
179            if (RESUME_IS_SERVER_ONLINE != null) {
180              try {
181                RESUME_IS_SERVER_ONLINE.await();
182              } catch (InterruptedException e) {
183                throw new RuntimeException(e);
184              }
185            }
186            break;
187          }
188        }
189      }
190      return super.isServerOnline(serverName);
191    }
192
193    @Override
194    public List<ServerName> createDestinationServersList() {
195      return super.createDestinationServersList(EXCLUDE_SERVERS);
196    }
197  }
198
199  public static final class HMasterForTest extends HMaster {
200
201    public HMasterForTest(Configuration conf) throws IOException {
202      super(conf);
203    }
204
205    @Override
206    protected AssignmentManager createAssignmentManager(MasterServices master,
207      MasterRegion masterRegion) {
208      return new AMForTest(master, masterRegion);
209    }
210
211    @Override
212    protected ServerManager createServerManager(MasterServices master) throws IOException {
213      setupClusterConnection();
214      return new SMForTest(master);
215    }
216  }
217
218  @BeforeClass
219  public static void setUp() throws Exception {
220    UTIL.startMiniCluster(StartTestingClusterOption.builder().numMasters(1)
221      .masterClass(HMasterForTest.class).numRegionServers(3).rsClass(RSForTest.class).build());
222    UTIL.createTable(NAME, CF);
223    // Here the test region must not be hosted on the same rs with meta region.
224    // We have 3 RSes and only two regions(meta and the test region), so they will not likely to be
225    // hosted on the same RS.
226    UTIL.waitTableAvailable(NAME);
227    UTIL.getAdmin().balancerSwitch(false, true);
228  }
229
230  @AfterClass
231  public static void tearDown() throws Exception {
232    UTIL.shutdownMiniCluster();
233  }
234
235  @Test
236  public void test() throws Exception {
237    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
238    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
239    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
240
241    ServerName sn = rsn.getRegionLocation();
242    RESUME_EXEC_PROC = new CountDownLatch(1);
243    ARRIVE_EXEC_PROC = new CountDownLatch(1);
244    RESUME_IS_SERVER_ONLINE = new CountDownLatch(1);
245
246    // reopen the region, and halt the executeProcedures method at RS side
247    am.moveAsync(new RegionPlan(region, sn, sn));
248    ARRIVE_EXEC_PROC.await();
249
250    RESUME_REPORT = new CountDownLatch(1);
251    ARRIVE_REPORT = new CountDownLatch(1);
252
253    // kill the region server
254    ServerName serverToKill = SERVER_TO_KILL;
255    UTIL.getMiniHBaseCluster().stopRegionServer(serverToKill);
256    RESUME_EXEC_PROC.countDown();
257
258    // wait until we are going to open the region on a new rs
259    ARRIVE_REPORT.await();
260
261    // resume the isServerOnline check, to let the rs procedure
262    RESUME_IS_SERVER_ONLINE.countDown();
263
264    // before HBASE-20811 the state could become OPEN, and this is why later the region will be
265    // assigned to two regionservers.
266    for (int i = 0; i < 15; i++) {
267      if (rsn.getState() == RegionState.State.OPEN) {
268        break;
269      }
270      Thread.sleep(1000);
271    }
272
273    // resume the old report
274    RESUME_REPORT.countDown();
275
276    // wait a bit to let the region to be online, it is not easy to write a condition for this so
277    // just sleep a while.
278    Thread.sleep(10000);
279
280    // confirm that the region is only on one rs
281    int count = 0;
282    for (RegionServerThread t : UTIL.getMiniHBaseCluster().getRegionServerThreads()) {
283      if (!t.getRegionServer().getRegions(NAME).isEmpty()) {
284        LOG.info("{} is on {}", region, t.getRegionServer().getServerName());
285        count++;
286      }
287    }
288    assertEquals(1, count);
289  }
290}