001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertNotNull;
021import static org.junit.Assert.assertNull;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.List;
026import java.util.concurrent.CountDownLatch;
027import java.util.concurrent.Future;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.HBaseTestingUtility;
031import org.apache.hadoop.hbase.PleaseHoldException;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.StartMiniClusterOption;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.ServerManager;
040import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
041import org.apache.hadoop.hbase.regionserver.HRegionServer;
042import org.apache.hadoop.hbase.testclassification.MasterTests;
043import org.apache.hadoop.hbase.testclassification.MediumTests;
044import org.apache.hadoop.hbase.util.Bytes;
045import org.apache.hadoop.hbase.util.IdLock;
046import org.junit.AfterClass;
047import org.junit.BeforeClass;
048import org.junit.ClassRule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051
052import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
053
054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
057
058/**
059 * Testcase for HBASE-22365.
060 */
061@Category({ MasterTests.class, MediumTests.class })
062public class TestSCPGetRegionsRace {
063
064  @ClassRule
065  public static final HBaseClassTestRule CLASS_RULE =
066    HBaseClassTestRule.forClass(TestSCPGetRegionsRace.class);
067
068  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
069
070  private static final class ServerManagerForTest extends ServerManager {
071
072    public ServerManagerForTest(MasterServices master) {
073      super(master);
074    }
075
076    @Override
077    public List<ServerName> createDestinationServersList() {
078      return super.createDestinationServersList(EXCLUDE_SERVERS);
079    }
080  }
081
082  private static CountDownLatch ARRIVE_REPORT;
083
084  private static CountDownLatch RESUME_REPORT;
085
086  private static CountDownLatch ARRIVE_GET;
087
088  private static CountDownLatch RESUME_GET;
089
090  private static final class AssignmentManagerForTest extends AssignmentManager {
091
092    public AssignmentManagerForTest(MasterServices master) {
093      super(master);
094    }
095
096    @Override
097    public ReportRegionStateTransitionResponse reportRegionStateTransition(
098        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
099      if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) {
100        if (ARRIVE_REPORT != null) {
101          ARRIVE_REPORT.countDown();
102          try {
103            RESUME_REPORT.await();
104            RESUME_REPORT = null;
105          } catch (InterruptedException e) {
106            throw new RuntimeException(e);
107          }
108        }
109      }
110      return super.reportRegionStateTransition(req);
111    }
112
113    @Override
114    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
115      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
116      if (ARRIVE_GET != null) {
117        ARRIVE_GET.countDown();
118        try {
119          RESUME_GET.await();
120          RESUME_GET = null;
121        } catch (InterruptedException e) {
122          throw new RuntimeException(e);
123        }
124      }
125      return regions;
126    }
127
128  }
129
130  public static final class HMasterForTest extends HMaster {
131
132    public HMasterForTest(Configuration conf) throws IOException {
133      super(conf);
134    }
135
136    @Override
137    protected AssignmentManager createAssignmentManager(MasterServices master) {
138      return new AssignmentManagerForTest(master);
139    }
140
141    @Override
142    protected ServerManager createServerManager(MasterServices master) throws IOException {
143      setupClusterConnection();
144      return new ServerManagerForTest(master);
145    }
146  }
147
148  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
149
150  private static TableName NAME = TableName.valueOf("Assign");
151
152  private static byte[] CF = Bytes.toBytes("cf");
153
154  @BeforeClass
155  public static void setUp() throws Exception {
156    UTIL.startMiniCluster(StartMiniClusterOption.builder().masterClass(HMasterForTest.class)
157      .numMasters(1).numRegionServers(3).build());
158    UTIL.createTable(NAME, CF);
159    UTIL.waitTableAvailable(NAME);
160    UTIL.getAdmin().balancerSwitch(false, true);
161  }
162
163  @AfterClass
164  public static void tearDown() throws Exception {
165    UTIL.shutdownMiniCluster();
166  }
167
168  @Test
169  public void test() throws Exception {
170    RegionInfo region =
171      Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo();
172    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
173    AssignmentManager am = master.getAssignmentManager();
174    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
175    ServerName source = rsn.getRegionLocation();
176    ServerName dest =
177      UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get();
178
179    ARRIVE_REPORT = new CountDownLatch(1);
180    RESUME_REPORT = new CountDownLatch(1);
181
182    Future<?> future = am.moveAsync(new RegionPlan(region, source, dest));
183
184    ARRIVE_REPORT.await();
185    ARRIVE_REPORT = null;
186    // let's get procedure lock to stop the TRSP
187    IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock();
188    long procId = master.getProcedures().stream()
189      .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId();
190    IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId);
191    RESUME_REPORT.countDown();
192
193    // kill the source region server
194    ARRIVE_GET = new CountDownLatch(1);
195    RESUME_GET = new CountDownLatch(1);
196    UTIL.getMiniHBaseCluster().killRegionServer(source);
197
198    // wait until we try to get the region list of the region server
199    ARRIVE_GET.await();
200    ARRIVE_GET = null;
201    // release the procedure lock and let the TRSP to finish
202    procExecutionLock.releaseLockEntry(lockEntry);
203    future.get();
204
205    // resume the SCP
206    EXCLUDE_SERVERS.add(dest);
207    RESUME_GET.countDown();
208    // wait until there are no SCPs and TRSPs
209    UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished() ||
210      (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure))));
211
212    // assert the region is only on the dest server.
213    HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest);
214    assertNotNull(rs.getRegion(region.getEncodedName()));
215    assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName()));
216  }
217}