001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertNotEquals;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.List;
025import java.util.concurrent.CountDownLatch;
026import java.util.concurrent.ExecutionException;
027import java.util.concurrent.Future;
028import java.util.concurrent.TimeUnit;
029import java.util.concurrent.TimeoutException;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseIOException;
033import org.apache.hadoop.hbase.HBaseTestingUtility;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.PleaseHoldException;
036import org.apache.hadoop.hbase.ServerName;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.master.HMaster;
040import org.apache.hadoop.hbase.master.MasterServices;
041import org.apache.hadoop.hbase.master.RegionPlan;
042import org.apache.hadoop.hbase.master.RegionServerList;
043import org.apache.hadoop.hbase.master.ServerManager;
044import org.apache.hadoop.hbase.master.region.MasterRegion;
045import org.apache.hadoop.hbase.regionserver.HRegionServer;
046import org.apache.hadoop.hbase.testclassification.MasterTests;
047import org.apache.hadoop.hbase.testclassification.MediumTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054
055import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
058
059@Category({ MasterTests.class, MediumTests.class })
060public class TestReportRegionStateTransitionFromDeadServer {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064    HBaseClassTestRule.forClass(TestReportRegionStateTransitionFromDeadServer.class);
065
066  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
067
068  private static CountDownLatch ARRIVE_GET_REGIONS;
069  private static CountDownLatch RESUME_GET_REGIONS;
070  private static CountDownLatch ARRIVE_REPORT;
071  private static CountDownLatch RESUME_REPORT;
072
073  private static final class ServerManagerForTest extends ServerManager {
074
075    public ServerManagerForTest(MasterServices master, RegionServerList storage) {
076      super(master, storage);
077    }
078
079    @Override
080    public List<ServerName> createDestinationServersList() {
081      return super.createDestinationServersList(EXCLUDE_SERVERS);
082    }
083  }
084
085  private static final class AssignmentManagerForTest extends AssignmentManager {
086
087    public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) {
088      super(master, masterRegion);
089    }
090
091    @Override
092    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
093      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
094      if (ARRIVE_GET_REGIONS != null) {
095        ARRIVE_GET_REGIONS.countDown();
096        try {
097          RESUME_GET_REGIONS.await();
098        } catch (InterruptedException e) {
099        }
100      }
101      return regions;
102    }
103
104    @Override
105    public ReportRegionStateTransitionResponse reportRegionStateTransition(
106      ReportRegionStateTransitionRequest req) throws PleaseHoldException {
107      if (
108        ARRIVE_REPORT != null && req.getTransitionList().stream()
109          .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion())
110      ) {
111        ARRIVE_REPORT.countDown();
112        try {
113          RESUME_REPORT.await();
114        } catch (InterruptedException e) {
115        }
116      }
117      return super.reportRegionStateTransition(req);
118    }
119  }
120
121  public static final class HMasterForTest extends HMaster {
122
123    public HMasterForTest(Configuration conf) throws IOException {
124      super(conf);
125    }
126
127    @Override
128    protected AssignmentManager createAssignmentManager(MasterServices master,
129      MasterRegion masterRegion) {
130      return new AssignmentManagerForTest(master, masterRegion);
131    }
132
133    @Override
134    protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
135      throws IOException {
136      setupClusterConnection();
137      return new ServerManagerForTest(master, storage);
138    }
139  }
140
141  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
142
143  private static TableName NAME = TableName.valueOf("Report");
144
145  private static byte[] CF = Bytes.toBytes("cf");
146
147  @BeforeClass
148  public static void setUp() throws Exception {
149    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
150    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
151    UTIL.startMiniCluster(3);
152    UTIL.getAdmin().balancerSwitch(false, true);
153    UTIL.createTable(NAME, CF);
154    UTIL.waitTableAvailable(NAME);
155  }
156
157  @AfterClass
158  public static void tearDown() throws Exception {
159    UTIL.shutdownMiniCluster();
160  }
161
162  @Test
163  public void test() throws HBaseIOException, InterruptedException, ExecutionException {
164    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
165    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
166    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
167
168    // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify
169    // that the region should not be on rs1 and rs2 both.
170    HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation());
171    HRegionServer rs1 = UTIL.getOtherRegionServer(rs0);
172    HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream()
173      .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get();
174
175    RESUME_REPORT = new CountDownLatch(1);
176    ARRIVE_REPORT = new CountDownLatch(1);
177    Future<?> future =
178      am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName()));
179    ARRIVE_REPORT.await();
180
181    RESUME_GET_REGIONS = new CountDownLatch(1);
182    ARRIVE_GET_REGIONS = new CountDownLatch(1);
183    rs0.abort("For testing!");
184
185    ARRIVE_GET_REGIONS.await();
186    RESUME_REPORT.countDown();
187
188    try {
189      future.get(15, TimeUnit.SECONDS);
190    } catch (TimeoutException e) {
191      // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any
192      // more before SCP interrupts it. It's OK.
193    }
194
195    EXCLUDE_SERVERS.add(rs1.getServerName());
196    RESUME_GET_REGIONS.countDown();
197    // wait until there are no running procedures, no SCP and no TRSP
198    UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()
199      .getActiveProcIds().isEmpty());
200    boolean onRS1 = !rs1.getRegions(NAME).isEmpty();
201    boolean onRS2 = !rs2.getRegions(NAME).isEmpty();
202    assertNotEquals(
203      "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2, onRS1,
204      onRS2);
205  }
206}