001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE;
021import static org.junit.jupiter.api.Assertions.assertEquals;
022
023import java.io.IOException;
024import java.util.Set;
025import java.util.concurrent.CountDownLatch;
026import java.util.concurrent.Future;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseTestingUtil;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.PleaseHoldException;
031import org.apache.hadoop.hbase.ServerName;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.Put;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.client.Table;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.MasterServices;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.RegionState;
040import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
041import org.apache.hadoop.hbase.master.region.MasterRegion;
042import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
043import org.apache.hadoop.hbase.testclassification.MasterTests;
044import org.apache.hadoop.hbase.testclassification.MediumTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.hadoop.hbase.util.IdLock;
047import org.junit.jupiter.api.AfterAll;
048import org.junit.jupiter.api.BeforeAll;
049import org.junit.jupiter.api.Tag;
050import org.junit.jupiter.api.Test;
051
052import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
053import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
054
055@Tag(MasterTests.TAG)
056@Tag(MediumTests.TAG)
057public class TestReportOnlineRegionsRace {
058
059  private static volatile CountDownLatch ARRIVE_RS_REPORT;
060  private static volatile CountDownLatch RESUME_RS_REPORT;
061  private static volatile CountDownLatch FINISH_RS_REPORT;
062
063  private static volatile CountDownLatch RESUME_REPORT_STATE;
064
065  private static final class AssignmentManagerForTest extends AssignmentManager {
066
067    public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) {
068      super(master, masterRegion);
069    }
070
071    @Override
072    public void reportOnlineRegions(ServerName serverName, Set<byte[]> regionNames) {
073      if (ARRIVE_RS_REPORT != null) {
074        ARRIVE_RS_REPORT.countDown();
075        try {
076          RESUME_RS_REPORT.await();
077        } catch (InterruptedException e) {
078          throw new RuntimeException(e);
079        }
080      }
081      super.reportOnlineRegions(serverName, regionNames);
082      if (FINISH_RS_REPORT != null) {
083        FINISH_RS_REPORT.countDown();
084      }
085    }
086
087    @Override
088    public ReportRegionStateTransitionResponse reportRegionStateTransition(
089      ReportRegionStateTransitionRequest req) throws PleaseHoldException {
090      if (RESUME_REPORT_STATE != null) {
091        try {
092          RESUME_REPORT_STATE.await();
093        } catch (InterruptedException e) {
094          throw new RuntimeException(e);
095        }
096      }
097      return super.reportRegionStateTransition(req);
098    }
099
100  }
101
102  public static final class HMasterForTest extends HMaster {
103
104    public HMasterForTest(Configuration conf) throws IOException {
105      super(conf);
106    }
107
108    @Override
109    protected AssignmentManager createAssignmentManager(MasterServices master,
110      MasterRegion masterRegion) {
111      return new AssignmentManagerForTest(master, masterRegion);
112    }
113  }
114
115  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
116
117  private static TableName NAME = TableName.valueOf("Race");
118
119  private static byte[] CF = Bytes.toBytes("cf");
120
121  @BeforeAll
122  public static void setUp() throws Exception {
123    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
124    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
125    UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT,
126      HConstants.DEFAULT_REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT);
127    UTIL.startMiniCluster(1);
128    UTIL.createTable(NAME, CF);
129    UTIL.waitTableAvailable(NAME);
130  }
131
132  @AfterAll
133  public static void tearDown() throws Exception {
134    UTIL.shutdownMiniCluster();
135  }
136
137  @Test
138  public void testRace() throws Exception {
139    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
140    ProcedureExecutor<MasterProcedureEnv> procExec =
141      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
142    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
143    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
144
145    // halt a regionServerReport
146    RESUME_RS_REPORT = new CountDownLatch(1);
147    ARRIVE_RS_REPORT = new CountDownLatch(1);
148    FINISH_RS_REPORT = new CountDownLatch(1);
149
150    ARRIVE_RS_REPORT.await();
151
152    // schedule a TRSP to REOPEN the region
153    RESUME_REPORT_STATE = new CountDownLatch(1);
154    Future<byte[]> future =
155      am.moveAsync(new RegionPlan(region, rsn.getRegionLocation(), rsn.getRegionLocation()));
156    TransitRegionStateProcedure proc =
157      procExec.getProcedures().stream().filter(p -> p instanceof TransitRegionStateProcedure)
158        .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p).findAny().get();
159    IdLock procExecLock = procExec.getProcExecutionLock();
160    // a CloseRegionProcedure and then the OpenRegionProcedure we want to block
161    IdLock.Entry lockEntry = procExecLock.getLockEntry(proc.getProcId() + 2);
162    // resume the reportRegionStateTransition to finish the CloseRegionProcedure
163    RESUME_REPORT_STATE.countDown();
164    // wait until we schedule the OpenRegionProcedure
165    UTIL.waitFor(10000,
166      () -> proc.getCurrentStateId() == REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE);
167    // the region should be in OPENING state
168    assertEquals(RegionState.State.OPENING, rsn.getState());
169    // resume the region server report
170    RESUME_RS_REPORT.countDown();
171    // wait until it finishes, it will find that the region is opened on the rs
172    FINISH_RS_REPORT.await();
173    // let the OpenRegionProcedure go
174    procExecLock.releaseLockEntry(lockEntry);
175    // wait until the TRSP is done
176    future.get();
177
178    // confirm that the region can still be write, i.e, the regionServerReport method should not
179    // change the region state to OPEN
180    try (Table table = UTIL.getConnection().getTableBuilder(NAME, null).setWriteRpcTimeout(1000)
181      .setOperationTimeout(2000).build()) {
182      table.put(
183        new Put(Bytes.toBytes("key")).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes("val")));
184    }
185  }
186}