001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertNotNull;
021import static org.junit.Assert.assertNull;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.List;
026import java.util.concurrent.CountDownLatch;
027import java.util.concurrent.Future;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.PleaseHoldException;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.StartTestingClusterOption;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.ServerManager;
040import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
041import org.apache.hadoop.hbase.master.region.MasterRegion;
042import org.apache.hadoop.hbase.regionserver.HRegionServer;
043import org.apache.hadoop.hbase.testclassification.MasterTests;
044import org.apache.hadoop.hbase.testclassification.MediumTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.hadoop.hbase.util.IdLock;
047import org.junit.AfterClass;
048import org.junit.BeforeClass;
049import org.junit.ClassRule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052
053import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
054
055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
058
059/**
060 * Testcase for HBASE-22365.
061 */
062@Category({ MasterTests.class, MediumTests.class })
063public class TestSCPGetRegionsRace {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067    HBaseClassTestRule.forClass(TestSCPGetRegionsRace.class);
068
069  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
070
071  private static final class ServerManagerForTest extends ServerManager {
072
073    public ServerManagerForTest(MasterServices master) {
074      super(master);
075    }
076
077    @Override
078    public List<ServerName> createDestinationServersList() {
079      return super.createDestinationServersList(EXCLUDE_SERVERS);
080    }
081  }
082
083  private static CountDownLatch ARRIVE_REPORT;
084
085  private static CountDownLatch RESUME_REPORT;
086
087  private static CountDownLatch ARRIVE_GET;
088
089  private static CountDownLatch RESUME_GET;
090
091  private static final class AssignmentManagerForTest extends AssignmentManager {
092
093    public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) {
094      super(master, masterRegion);
095    }
096
097    @Override
098    public ReportRegionStateTransitionResponse reportRegionStateTransition(
099        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
100      if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) {
101        if (ARRIVE_REPORT != null) {
102          ARRIVE_REPORT.countDown();
103          try {
104            RESUME_REPORT.await();
105            RESUME_REPORT = null;
106          } catch (InterruptedException e) {
107            throw new RuntimeException(e);
108          }
109        }
110      }
111      return super.reportRegionStateTransition(req);
112    }
113
114    @Override
115    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
116      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
117      if (ARRIVE_GET != null) {
118        ARRIVE_GET.countDown();
119        try {
120          RESUME_GET.await();
121          RESUME_GET = null;
122        } catch (InterruptedException e) {
123          throw new RuntimeException(e);
124        }
125      }
126      return regions;
127    }
128
129  }
130
131  public static final class HMasterForTest extends HMaster {
132
133    public HMasterForTest(Configuration conf) throws IOException {
134      super(conf);
135    }
136
137    @Override
138    protected AssignmentManager createAssignmentManager(MasterServices master,
139      MasterRegion masterRegion) {
140      return new AssignmentManagerForTest(master, masterRegion);
141    }
142
143    @Override
144    protected ServerManager createServerManager(MasterServices master) throws IOException {
145      setupClusterConnection();
146      return new ServerManagerForTest(master);
147    }
148  }
149
150  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
151
152  private static TableName NAME = TableName.valueOf("Assign");
153
154  private static byte[] CF = Bytes.toBytes("cf");
155
156  @BeforeClass
157  public static void setUp() throws Exception {
158    UTIL.startMiniCluster(StartTestingClusterOption.builder().masterClass(HMasterForTest.class)
159      .numMasters(1).numRegionServers(3).build());
160    UTIL.createTable(NAME, CF);
161    UTIL.waitTableAvailable(NAME);
162    UTIL.getAdmin().balancerSwitch(false, true);
163  }
164
165  @AfterClass
166  public static void tearDown() throws Exception {
167    UTIL.shutdownMiniCluster();
168  }
169
170  @Test
171  public void test() throws Exception {
172    RegionInfo region =
173      Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo();
174    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
175    AssignmentManager am = master.getAssignmentManager();
176    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
177    ServerName source = rsn.getRegionLocation();
178    ServerName dest =
179      UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get();
180
181    ARRIVE_REPORT = new CountDownLatch(1);
182    RESUME_REPORT = new CountDownLatch(1);
183
184    Future<?> future = am.moveAsync(new RegionPlan(region, source, dest));
185
186    ARRIVE_REPORT.await();
187    ARRIVE_REPORT = null;
188    // let's get procedure lock to stop the TRSP
189    IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock();
190    long procId = master.getProcedures().stream()
191      .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId();
192    IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId);
193    RESUME_REPORT.countDown();
194
195    // kill the source region server
196    ARRIVE_GET = new CountDownLatch(1);
197    RESUME_GET = new CountDownLatch(1);
198    UTIL.getMiniHBaseCluster().killRegionServer(source);
199
200    // wait until we try to get the region list of the region server
201    ARRIVE_GET.await();
202    ARRIVE_GET = null;
203    // release the procedure lock and let the TRSP to finish
204    procExecutionLock.releaseLockEntry(lockEntry);
205    future.get();
206
207    // resume the SCP
208    EXCLUDE_SERVERS.add(dest);
209    RESUME_GET.countDown();
210    // wait until there are no SCPs and TRSPs
211    UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished() ||
212      (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure))));
213
214    // assert the region is only on the dest server.
215    HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest);
216    assertNotNull(rs.getRegion(region.getEncodedName()));
217    assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName()));
218  }
219}