001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.jupiter.api.Assertions.assertNotNull;
021import static org.junit.jupiter.api.Assertions.assertNull;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.List;
026import java.util.concurrent.CountDownLatch;
027import java.util.concurrent.Future;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hbase.HBaseTestingUtil;
030import org.apache.hadoop.hbase.PleaseHoldException;
031import org.apache.hadoop.hbase.ServerName;
032import org.apache.hadoop.hbase.StartTestingClusterOption;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.master.HMaster;
036import org.apache.hadoop.hbase.master.MasterServices;
037import org.apache.hadoop.hbase.master.RegionPlan;
038import org.apache.hadoop.hbase.master.RegionServerList;
039import org.apache.hadoop.hbase.master.ServerManager;
040import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
041import org.apache.hadoop.hbase.master.region.MasterRegion;
042import org.apache.hadoop.hbase.regionserver.HRegionServer;
043import org.apache.hadoop.hbase.testclassification.MasterTests;
044import org.apache.hadoop.hbase.testclassification.MediumTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.hadoop.hbase.util.IdLock;
047import org.junit.jupiter.api.AfterAll;
048import org.junit.jupiter.api.BeforeAll;
049import org.junit.jupiter.api.Tag;
050import org.junit.jupiter.api.Test;
051
052import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
053
054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
057
058/**
059 * Testcase for HBASE-22365.
060 */
061@Tag(MasterTests.TAG)
062@Tag(MediumTests.TAG)
063public class TestSCPGetRegionsRace {
064
065  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
066
067  private static final class ServerManagerForTest extends ServerManager {
068
069    public ServerManagerForTest(MasterServices master, RegionServerList storage) {
070      super(master, storage);
071    }
072
073    @Override
074    public List<ServerName> createDestinationServersList() {
075      return super.createDestinationServersList(EXCLUDE_SERVERS);
076    }
077  }
078
079  private static CountDownLatch ARRIVE_REPORT;
080
081  private static CountDownLatch RESUME_REPORT;
082
083  private static CountDownLatch ARRIVE_GET;
084
085  private static CountDownLatch RESUME_GET;
086
087  private static final class AssignmentManagerForTest extends AssignmentManager {
088
089    public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) {
090      super(master, masterRegion);
091    }
092
093    @Override
094    public ReportRegionStateTransitionResponse reportRegionStateTransition(
095      ReportRegionStateTransitionRequest req) throws PleaseHoldException {
096      if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) {
097        if (ARRIVE_REPORT != null) {
098          ARRIVE_REPORT.countDown();
099          try {
100            RESUME_REPORT.await();
101            RESUME_REPORT = null;
102          } catch (InterruptedException e) {
103            throw new RuntimeException(e);
104          }
105        }
106      }
107      return super.reportRegionStateTransition(req);
108    }
109
110    @Override
111    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
112      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
113      if (ARRIVE_GET != null) {
114        ARRIVE_GET.countDown();
115        try {
116          RESUME_GET.await();
117          RESUME_GET = null;
118        } catch (InterruptedException e) {
119          throw new RuntimeException(e);
120        }
121      }
122      return regions;
123    }
124
125  }
126
127  public static final class HMasterForTest extends HMaster {
128
129    public HMasterForTest(Configuration conf) throws IOException {
130      super(conf);
131    }
132
133    @Override
134    protected AssignmentManager createAssignmentManager(MasterServices master,
135      MasterRegion masterRegion) {
136      return new AssignmentManagerForTest(master, masterRegion);
137    }
138
139    @Override
140    protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
141      throws IOException {
142      setupClusterConnection();
143      return new ServerManagerForTest(master, storage);
144    }
145  }
146
147  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
148
149  private static TableName NAME = TableName.valueOf("Assign");
150
151  private static byte[] CF = Bytes.toBytes("cf");
152
153  @BeforeAll
154  public static void setUp() throws Exception {
155    UTIL.startMiniCluster(StartTestingClusterOption.builder().masterClass(HMasterForTest.class)
156      .numMasters(1).numRegionServers(3).build());
157    UTIL.createTable(NAME, CF);
158    UTIL.waitTableAvailable(NAME);
159    UTIL.getAdmin().balancerSwitch(false, true);
160  }
161
162  @AfterAll
163  public static void tearDown() throws Exception {
164    UTIL.shutdownMiniCluster();
165  }
166
167  @Test
168  public void test() throws Exception {
169    RegionInfo region =
170      Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo();
171    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
172    AssignmentManager am = master.getAssignmentManager();
173    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
174    ServerName source = rsn.getRegionLocation();
175    ServerName dest =
176      UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get();
177
178    ARRIVE_REPORT = new CountDownLatch(1);
179    RESUME_REPORT = new CountDownLatch(1);
180
181    Future<?> future = am.moveAsync(new RegionPlan(region, source, dest));
182
183    ARRIVE_REPORT.await();
184    ARRIVE_REPORT = null;
185    // let's get procedure lock to stop the TRSP
186    IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock();
187    long procId = master.getProcedures().stream()
188      .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId();
189    IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId);
190    RESUME_REPORT.countDown();
191
192    // kill the source region server
193    ARRIVE_GET = new CountDownLatch(1);
194    RESUME_GET = new CountDownLatch(1);
195    UTIL.getMiniHBaseCluster().killRegionServer(source);
196
197    // wait until we try to get the region list of the region server
198    ARRIVE_GET.await();
199    ARRIVE_GET = null;
200    // release the procedure lock and let the TRSP to finish
201    procExecutionLock.releaseLockEntry(lockEntry);
202    future.get();
203
204    // resume the SCP
205    EXCLUDE_SERVERS.add(dest);
206    RESUME_GET.countDown();
207    // wait until there are no SCPs and TRSPs
208    UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished()
209      || (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure))));
210
211    // assert the region is only on the dest server.
212    HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest);
213    assertNotNull(rs.getRegion(region.getEncodedName()));
214    assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName()));
215  }
216}