001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.jupiter.api.Assertions.assertNotEquals;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.List;
025import java.util.concurrent.CountDownLatch;
026import java.util.concurrent.ExecutionException;
027import java.util.concurrent.Future;
028import java.util.concurrent.TimeUnit;
029import java.util.concurrent.TimeoutException;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.HBaseIOException;
032import org.apache.hadoop.hbase.HBaseTestingUtil;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.PleaseHoldException;
035import org.apache.hadoop.hbase.ServerName;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.master.HMaster;
039import org.apache.hadoop.hbase.master.MasterServices;
040import org.apache.hadoop.hbase.master.RegionPlan;
041import org.apache.hadoop.hbase.master.RegionServerList;
042import org.apache.hadoop.hbase.master.ServerManager;
043import org.apache.hadoop.hbase.master.region.MasterRegion;
044import org.apache.hadoop.hbase.regionserver.HRegionServer;
045import org.apache.hadoop.hbase.testclassification.MasterTests;
046import org.apache.hadoop.hbase.testclassification.MediumTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.junit.jupiter.api.AfterAll;
049import org.junit.jupiter.api.BeforeAll;
050import org.junit.jupiter.api.Tag;
051import org.junit.jupiter.api.Test;
052
053import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
056
057@Tag(MasterTests.TAG)
058@Tag(MediumTests.TAG)
059public class TestReportRegionStateTransitionFromDeadServer {
060
061  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
062
063  private static CountDownLatch ARRIVE_GET_REGIONS;
064  private static CountDownLatch RESUME_GET_REGIONS;
065  private static CountDownLatch ARRIVE_REPORT;
066  private static CountDownLatch RESUME_REPORT;
067
068  private static final class ServerManagerForTest extends ServerManager {
069
070    public ServerManagerForTest(MasterServices master, RegionServerList storage) {
071      super(master, storage);
072    }
073
074    @Override
075    public List<ServerName> createDestinationServersList() {
076      return super.createDestinationServersList(EXCLUDE_SERVERS);
077    }
078  }
079
080  private static final class AssignmentManagerForTest extends AssignmentManager {
081
082    public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) {
083      super(master, masterRegion);
084    }
085
086    @Override
087    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
088      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
089      if (ARRIVE_GET_REGIONS != null) {
090        ARRIVE_GET_REGIONS.countDown();
091        try {
092          RESUME_GET_REGIONS.await();
093        } catch (InterruptedException e) {
094        }
095      }
096      return regions;
097    }
098
099    @Override
100    public ReportRegionStateTransitionResponse reportRegionStateTransition(
101      ReportRegionStateTransitionRequest req) throws PleaseHoldException {
102      if (
103        ARRIVE_REPORT != null && req.getTransitionList().stream()
104          .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion())
105      ) {
106        ARRIVE_REPORT.countDown();
107        try {
108          RESUME_REPORT.await();
109        } catch (InterruptedException e) {
110        }
111      }
112      return super.reportRegionStateTransition(req);
113    }
114  }
115
116  public static final class HMasterForTest extends HMaster {
117
118    public HMasterForTest(Configuration conf) throws IOException {
119      super(conf);
120    }
121
122    @Override
123    protected AssignmentManager createAssignmentManager(MasterServices master,
124      MasterRegion masterRegion) {
125      return new AssignmentManagerForTest(master, masterRegion);
126    }
127
128    @Override
129    protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
130      throws IOException {
131      setupClusterConnection();
132      return new ServerManagerForTest(master, storage);
133    }
134  }
135
136  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
137
138  private static TableName NAME = TableName.valueOf("Report");
139
140  private static byte[] CF = Bytes.toBytes("cf");
141
142  @BeforeAll
143  public static void setUp() throws Exception {
144    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
145    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
146    UTIL.startMiniCluster(3);
147    UTIL.getAdmin().balancerSwitch(false, true);
148    UTIL.createTable(NAME, CF);
149    UTIL.waitTableAvailable(NAME);
150  }
151
152  @AfterAll
153  public static void tearDown() throws Exception {
154    UTIL.shutdownMiniCluster();
155  }
156
157  @Test
158  public void test() throws HBaseIOException, InterruptedException, ExecutionException {
159    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
160    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
161    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
162
163    // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify
164    // that the region should not be on rs1 and rs2 both.
165    HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation());
166    HRegionServer rs1 = UTIL.getOtherRegionServer(rs0);
167    HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream()
168      .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get();
169
170    RESUME_REPORT = new CountDownLatch(1);
171    ARRIVE_REPORT = new CountDownLatch(1);
172    Future<?> future =
173      am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName()));
174    ARRIVE_REPORT.await();
175
176    RESUME_GET_REGIONS = new CountDownLatch(1);
177    ARRIVE_GET_REGIONS = new CountDownLatch(1);
178    rs0.abort("For testing!");
179
180    ARRIVE_GET_REGIONS.await();
181    RESUME_REPORT.countDown();
182
183    try {
184      future.get(15, TimeUnit.SECONDS);
185    } catch (TimeoutException e) {
186      // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any
187      // more before SCP interrupts it. It's OK.
188    }
189
190    EXCLUDE_SERVERS.add(rs1.getServerName());
191    RESUME_GET_REGIONS.countDown();
192    // wait until there are no running procedures, no SCP and no TRSP
193    UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()
194      .getActiveProcIds().isEmpty());
195    boolean onRS1 = !rs1.getRegions(NAME).isEmpty();
196    boolean onRS2 = !rs2.getRegions(NAME).isEmpty();
197    assertNotEquals(onRS2, onRS1,
198      "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2);
199  }
200}