001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertNotNull;
021import static org.junit.Assert.assertNull;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.List;
026import java.util.concurrent.CountDownLatch;
027import java.util.concurrent.Future;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.PleaseHoldException;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.StartTestingClusterOption;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.RegionServerList;
040import org.apache.hadoop.hbase.master.ServerManager;
041import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
042import org.apache.hadoop.hbase.master.region.MasterRegion;
043import org.apache.hadoop.hbase.regionserver.HRegionServer;
044import org.apache.hadoop.hbase.testclassification.MasterTests;
045import org.apache.hadoop.hbase.testclassification.MediumTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.IdLock;
048import org.junit.AfterClass;
049import org.junit.BeforeClass;
050import org.junit.ClassRule;
051import org.junit.Test;
052import org.junit.experimental.categories.Category;
053
054import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
055
056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
058import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
059
060/**
061 * Testcase for HBASE-22365.
062 */
063@Category({ MasterTests.class, MediumTests.class })
064public class TestSCPGetRegionsRace {
065
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068    HBaseClassTestRule.forClass(TestSCPGetRegionsRace.class);
069
070  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
071
072  private static final class ServerManagerForTest extends ServerManager {
073
074    public ServerManagerForTest(MasterServices master, RegionServerList storage) {
075      super(master, storage);
076    }
077
078    @Override
079    public List<ServerName> createDestinationServersList() {
080      return super.createDestinationServersList(EXCLUDE_SERVERS);
081    }
082  }
083
084  private static CountDownLatch ARRIVE_REPORT;
085
086  private static CountDownLatch RESUME_REPORT;
087
088  private static CountDownLatch ARRIVE_GET;
089
090  private static CountDownLatch RESUME_GET;
091
092  private static final class AssignmentManagerForTest extends AssignmentManager {
093
094    public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) {
095      super(master, masterRegion);
096    }
097
098    @Override
099    public ReportRegionStateTransitionResponse reportRegionStateTransition(
100      ReportRegionStateTransitionRequest req) throws PleaseHoldException {
101      if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) {
102        if (ARRIVE_REPORT != null) {
103          ARRIVE_REPORT.countDown();
104          try {
105            RESUME_REPORT.await();
106            RESUME_REPORT = null;
107          } catch (InterruptedException e) {
108            throw new RuntimeException(e);
109          }
110        }
111      }
112      return super.reportRegionStateTransition(req);
113    }
114
115    @Override
116    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
117      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
118      if (ARRIVE_GET != null) {
119        ARRIVE_GET.countDown();
120        try {
121          RESUME_GET.await();
122          RESUME_GET = null;
123        } catch (InterruptedException e) {
124          throw new RuntimeException(e);
125        }
126      }
127      return regions;
128    }
129
130  }
131
132  public static final class HMasterForTest extends HMaster {
133
134    public HMasterForTest(Configuration conf) throws IOException {
135      super(conf);
136    }
137
138    @Override
139    protected AssignmentManager createAssignmentManager(MasterServices master,
140      MasterRegion masterRegion) {
141      return new AssignmentManagerForTest(master, masterRegion);
142    }
143
144    @Override
145    protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
146      throws IOException {
147      setupClusterConnection();
148      return new ServerManagerForTest(master, storage);
149    }
150  }
151
152  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
153
154  private static TableName NAME = TableName.valueOf("Assign");
155
156  private static byte[] CF = Bytes.toBytes("cf");
157
158  @BeforeClass
159  public static void setUp() throws Exception {
160    UTIL.startMiniCluster(StartTestingClusterOption.builder().masterClass(HMasterForTest.class)
161      .numMasters(1).numRegionServers(3).build());
162    UTIL.createTable(NAME, CF);
163    UTIL.waitTableAvailable(NAME);
164    UTIL.getAdmin().balancerSwitch(false, true);
165  }
166
167  @AfterClass
168  public static void tearDown() throws Exception {
169    UTIL.shutdownMiniCluster();
170  }
171
172  @Test
173  public void test() throws Exception {
174    RegionInfo region =
175      Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo();
176    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
177    AssignmentManager am = master.getAssignmentManager();
178    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
179    ServerName source = rsn.getRegionLocation();
180    ServerName dest =
181      UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get();
182
183    ARRIVE_REPORT = new CountDownLatch(1);
184    RESUME_REPORT = new CountDownLatch(1);
185
186    Future<?> future = am.moveAsync(new RegionPlan(region, source, dest));
187
188    ARRIVE_REPORT.await();
189    ARRIVE_REPORT = null;
190    // let's get procedure lock to stop the TRSP
191    IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock();
192    long procId = master.getProcedures().stream()
193      .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId();
194    IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId);
195    RESUME_REPORT.countDown();
196
197    // kill the source region server
198    ARRIVE_GET = new CountDownLatch(1);
199    RESUME_GET = new CountDownLatch(1);
200    UTIL.getMiniHBaseCluster().killRegionServer(source);
201
202    // wait until we try to get the region list of the region server
203    ARRIVE_GET.await();
204    ARRIVE_GET = null;
205    // release the procedure lock and let the TRSP to finish
206    procExecutionLock.releaseLockEntry(lockEntry);
207    future.get();
208
209    // resume the SCP
210    EXCLUDE_SERVERS.add(dest);
211    RESUME_GET.countDown();
212    // wait until there are no SCPs and TRSPs
213    UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished()
214      || (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure))));
215
216    // assert the region is only on the dest server.
217    HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest);
218    assertNotNull(rs.getRegion(region.getEncodedName()));
219    assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName()));
220  }
221}