001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertNotEquals;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.List;
025import java.util.concurrent.CountDownLatch;
026import java.util.concurrent.ExecutionException;
027import java.util.concurrent.Future;
028import java.util.concurrent.TimeUnit;
029import java.util.concurrent.TimeoutException;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseIOException;
033import org.apache.hadoop.hbase.HBaseTestingUtility;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.PleaseHoldException;
036import org.apache.hadoop.hbase.ServerName;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.master.HMaster;
040import org.apache.hadoop.hbase.master.MasterServices;
041import org.apache.hadoop.hbase.master.RegionPlan;
042import org.apache.hadoop.hbase.master.ServerManager;
043import org.apache.hadoop.hbase.regionserver.HRegionServer;
044import org.apache.hadoop.hbase.testclassification.MasterTests;
045import org.apache.hadoop.hbase.testclassification.MediumTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.junit.AfterClass;
048import org.junit.BeforeClass;
049import org.junit.ClassRule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052
053import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
056
057@Category({ MasterTests.class, MediumTests.class })
058public class TestReportRegionStateTransitionFromDeadServer {
059
060  @ClassRule
061  public static final HBaseClassTestRule CLASS_RULE =
062    HBaseClassTestRule.forClass(TestReportRegionStateTransitionFromDeadServer.class);
063
064  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
065
066  private static CountDownLatch ARRIVE_GET_REGIONS;
067  private static CountDownLatch RESUME_GET_REGIONS;
068  private static CountDownLatch ARRIVE_REPORT;
069  private static CountDownLatch RESUME_REPORT;
070
071  private static final class ServerManagerForTest extends ServerManager {
072
073    public ServerManagerForTest(MasterServices master) {
074      super(master);
075    }
076
077    @Override
078    public List<ServerName> createDestinationServersList() {
079      return super.createDestinationServersList(EXCLUDE_SERVERS);
080    }
081  }
082
083  private static final class AssignmentManagerForTest extends AssignmentManager {
084
085    public AssignmentManagerForTest(MasterServices master) {
086      super(master);
087    }
088
089    @Override
090    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
091      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
092      if (ARRIVE_GET_REGIONS != null) {
093        ARRIVE_GET_REGIONS.countDown();
094        try {
095          RESUME_GET_REGIONS.await();
096        } catch (InterruptedException e) {
097        }
098      }
099      return regions;
100    }
101
102    @Override
103    public ReportRegionStateTransitionResponse reportRegionStateTransition(
104        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
105      if (ARRIVE_REPORT != null && req.getTransitionList().stream()
106        .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion())) {
107        ARRIVE_REPORT.countDown();
108        try {
109          RESUME_REPORT.await();
110        } catch (InterruptedException e) {
111        }
112      }
113      return super.reportRegionStateTransition(req);
114    }
115  }
116
117  public static final class HMasterForTest extends HMaster {
118
119    public HMasterForTest(Configuration conf) throws IOException {
120      super(conf);
121    }
122
123    @Override
124    protected AssignmentManager createAssignmentManager(MasterServices master) {
125      return new AssignmentManagerForTest(master);
126    }
127
128    @Override
129    protected ServerManager createServerManager(MasterServices master) throws IOException {
130      setupClusterConnection();
131      return new ServerManagerForTest(master);
132    }
133  }
134
135  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
136
137  private static TableName NAME = TableName.valueOf("Report");
138
139  private static byte[] CF = Bytes.toBytes("cf");
140
141  @BeforeClass
142  public static void setUp() throws Exception {
143    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
144    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
145    UTIL.startMiniCluster(3);
146    UTIL.getAdmin().balancerSwitch(false, true);
147    UTIL.createTable(NAME, CF);
148    UTIL.waitTableAvailable(NAME);
149  }
150
151  @AfterClass
152  public static void tearDown() throws Exception {
153    UTIL.shutdownMiniCluster();
154  }
155
156  @Test
157  public void test() throws HBaseIOException, InterruptedException, ExecutionException {
158    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
159    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
160    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
161
162    // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify
163    // that the region should not be on rs1 and rs2 both.
164    HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation());
165    HRegionServer rs1 = UTIL.getOtherRegionServer(rs0);
166    HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream()
167      .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get();
168
169    RESUME_REPORT = new CountDownLatch(1);
170    ARRIVE_REPORT = new CountDownLatch(1);
171    Future<?> future =
172      am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName()));
173    ARRIVE_REPORT.await();
174
175    RESUME_GET_REGIONS = new CountDownLatch(1);
176    ARRIVE_GET_REGIONS = new CountDownLatch(1);
177    rs0.abort("For testing!");
178
179    ARRIVE_GET_REGIONS.await();
180    RESUME_REPORT.countDown();
181
182    try {
183      future.get(15, TimeUnit.SECONDS);
184    } catch (TimeoutException e) {
185      // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any
186      // more before SCP interrupts it. It's OK.
187    }
188
189    EXCLUDE_SERVERS.add(rs1.getServerName());
190    RESUME_GET_REGIONS.countDown();
191    // wait until there are no running procedures, no SCP and no TRSP
192    UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()
193      .getActiveProcIds().isEmpty());
194    boolean onRS1 = !rs1.getRegions(NAME).isEmpty();
195    boolean onRS2 = !rs2.getRegions(NAME).isEmpty();
196    assertNotEquals(
197      "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2, onRS1,
198      onRS2);
199  }
200}