001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertNotEquals;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.List;
025import java.util.concurrent.CountDownLatch;
026import java.util.concurrent.ExecutionException;
027import java.util.concurrent.Future;
028import java.util.concurrent.TimeUnit;
029import java.util.concurrent.TimeoutException;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseIOException;
033import org.apache.hadoop.hbase.HBaseTestingUtility;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.PleaseHoldException;
036import org.apache.hadoop.hbase.ServerName;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.master.HMaster;
040import org.apache.hadoop.hbase.master.MasterServices;
041import org.apache.hadoop.hbase.master.RegionPlan;
042import org.apache.hadoop.hbase.master.ServerManager;
043import org.apache.hadoop.hbase.regionserver.HRegionServer;
044import org.apache.hadoop.hbase.testclassification.MasterTests;
045import org.apache.hadoop.hbase.testclassification.MediumTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.zookeeper.KeeperException;
048import org.junit.AfterClass;
049import org.junit.BeforeClass;
050import org.junit.ClassRule;
051import org.junit.Test;
052import org.junit.experimental.categories.Category;
053
054import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
057
058@Category({ MasterTests.class, MediumTests.class })
059public class TestReportRegionStateTransitionFromDeadServer {
060
061  @ClassRule
062  public static final HBaseClassTestRule CLASS_RULE =
063    HBaseClassTestRule.forClass(TestReportRegionStateTransitionFromDeadServer.class);
064
065  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
066
067  private static CountDownLatch ARRIVE_GET_REGIONS;
068  private static CountDownLatch RESUME_GET_REGIONS;
069  private static CountDownLatch ARRIVE_REPORT;
070  private static CountDownLatch RESUME_REPORT;
071
072  private static final class ServerManagerForTest extends ServerManager {
073
074    public ServerManagerForTest(MasterServices master) {
075      super(master);
076    }
077
078    @Override
079    public List<ServerName> createDestinationServersList() {
080      return super.createDestinationServersList(EXCLUDE_SERVERS);
081    }
082  }
083
084  private static final class AssignmentManagerForTest extends AssignmentManager {
085
086    public AssignmentManagerForTest(MasterServices master) {
087      super(master);
088    }
089
090    @Override
091    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
092      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
093      if (ARRIVE_GET_REGIONS != null) {
094        ARRIVE_GET_REGIONS.countDown();
095        try {
096          RESUME_GET_REGIONS.await();
097        } catch (InterruptedException e) {
098        }
099      }
100      return regions;
101    }
102
103    @Override
104    public ReportRegionStateTransitionResponse reportRegionStateTransition(
105        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
106      if (ARRIVE_REPORT != null && req.getTransitionList().stream()
107        .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion())) {
108        ARRIVE_REPORT.countDown();
109        try {
110          RESUME_REPORT.await();
111        } catch (InterruptedException e) {
112        }
113      }
114      return super.reportRegionStateTransition(req);
115    }
116  }
117
118  public static final class HMasterForTest extends HMaster {
119
120    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
121      super(conf);
122    }
123
124    @Override
125    protected AssignmentManager createAssignmentManager(MasterServices master) {
126      return new AssignmentManagerForTest(master);
127    }
128
129    @Override
130    protected ServerManager createServerManager(MasterServices master) throws IOException {
131      setupClusterConnection();
132      return new ServerManagerForTest(master);
133    }
134  }
135
136  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
137
138  private static TableName NAME = TableName.valueOf("Report");
139
140  private static byte[] CF = Bytes.toBytes("cf");
141
142  @BeforeClass
143  public static void setUp() throws Exception {
144    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
145    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
146    UTIL.startMiniCluster(3);
147    UTIL.getAdmin().balancerSwitch(false, true);
148    UTIL.createTable(NAME, CF);
149    UTIL.waitTableAvailable(NAME);
150  }
151
152  @AfterClass
153  public static void tearDown() throws Exception {
154    UTIL.shutdownMiniCluster();
155  }
156
157  @Test
158  public void test() throws HBaseIOException, InterruptedException, ExecutionException {
159    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
160    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
161    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
162
163    // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify
164    // that the region should not be on rs1 and rs2 both.
165    HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation());
166    HRegionServer rs1 = UTIL.getOtherRegionServer(rs0);
167    HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream()
168      .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get();
169
170    RESUME_REPORT = new CountDownLatch(1);
171    ARRIVE_REPORT = new CountDownLatch(1);
172    Future<?> future =
173      am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName()));
174    ARRIVE_REPORT.await();
175
176    RESUME_GET_REGIONS = new CountDownLatch(1);
177    ARRIVE_GET_REGIONS = new CountDownLatch(1);
178    rs0.abort("For testing!");
179
180    ARRIVE_GET_REGIONS.await();
181    RESUME_REPORT.countDown();
182
183    try {
184      future.get(15, TimeUnit.SECONDS);
185    } catch (TimeoutException e) {
186      // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any
187      // more before SCP interrupts it. It's OK.
188    }
189
190    EXCLUDE_SERVERS.add(rs1.getServerName());
191    RESUME_GET_REGIONS.countDown();
192    // wait until there are no running procedures, no SCP and no TRSP
193    UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()
194      .getActiveProcIds().isEmpty());
195    boolean onRS1 = !rs1.getRegions(NAME).isEmpty();
196    boolean onRS2 = !rs2.getRegions(NAME).isEmpty();
197    assertNotEquals(
198      "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2, onRS1,
199      onRS2);
200  }
201}