001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE;
021import static org.junit.Assert.assertEquals;
022
023import java.io.IOException;
024import java.util.Set;
025import java.util.concurrent.CountDownLatch;
026import java.util.concurrent.Future;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtility;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.PleaseHoldException;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.Table;
037import org.apache.hadoop.hbase.master.HMaster;
038import org.apache.hadoop.hbase.master.MasterServices;
039import org.apache.hadoop.hbase.master.RegionPlan;
040import org.apache.hadoop.hbase.master.RegionState;
041import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
042import org.apache.hadoop.hbase.master.region.MasterRegion;
043import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
044import org.apache.hadoop.hbase.testclassification.MasterTests;
045import org.apache.hadoop.hbase.testclassification.MediumTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.IdLock;
048import org.junit.AfterClass;
049import org.junit.BeforeClass;
050import org.junit.ClassRule;
051import org.junit.Test;
052import org.junit.experimental.categories.Category;
053
054import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
056
057@Category({ MasterTests.class, MediumTests.class })
058public class TestReportOnlineRegionsRace {
059
060  @ClassRule
061  public static final HBaseClassTestRule CLASS_RULE =
062    HBaseClassTestRule.forClass(TestReportOnlineRegionsRace.class);
063
064  private static volatile CountDownLatch ARRIVE_RS_REPORT;
065  private static volatile CountDownLatch RESUME_RS_REPORT;
066  private static volatile CountDownLatch FINISH_RS_REPORT;
067
068  private static volatile CountDownLatch RESUME_REPORT_STATE;
069
070  private static final class AssignmentManagerForTest extends AssignmentManager {
071
072    public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) {
073      super(master, masterRegion);
074    }
075
076    @Override
077    public void reportOnlineRegions(ServerName serverName, Set<byte[]> regionNames) {
078      if (ARRIVE_RS_REPORT != null) {
079        ARRIVE_RS_REPORT.countDown();
080        try {
081          RESUME_RS_REPORT.await();
082        } catch (InterruptedException e) {
083          throw new RuntimeException(e);
084        }
085      }
086      super.reportOnlineRegions(serverName, regionNames);
087      if (FINISH_RS_REPORT != null) {
088        FINISH_RS_REPORT.countDown();
089      }
090    }
091
092    @Override
093    public ReportRegionStateTransitionResponse reportRegionStateTransition(
094      ReportRegionStateTransitionRequest req) throws PleaseHoldException {
095      if (RESUME_REPORT_STATE != null) {
096        try {
097          RESUME_REPORT_STATE.await();
098        } catch (InterruptedException e) {
099          throw new RuntimeException(e);
100        }
101      }
102      return super.reportRegionStateTransition(req);
103    }
104
105  }
106
107  public static final class HMasterForTest extends HMaster {
108
109    public HMasterForTest(Configuration conf) throws IOException {
110      super(conf);
111    }
112
113    @Override
114    protected AssignmentManager createAssignmentManager(MasterServices master,
115      MasterRegion masterRegion) {
116      return new AssignmentManagerForTest(master, masterRegion);
117    }
118  }
119
120  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
121
122  private static TableName NAME = TableName.valueOf("Race");
123
124  private static byte[] CF = Bytes.toBytes("cf");
125
126  @BeforeClass
127  public static void setUp() throws Exception {
128    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
129    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
130    UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT,
131      HConstants.DEFAULT_REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT);
132    UTIL.startMiniCluster(1);
133    UTIL.createTable(NAME, CF);
134    UTIL.waitTableAvailable(NAME);
135  }
136
137  @AfterClass
138  public static void tearDown() throws Exception {
139    UTIL.shutdownMiniCluster();
140  }
141
142  @Test
143  public void testRace() throws Exception {
144    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
145    ProcedureExecutor<MasterProcedureEnv> procExec =
146      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
147    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
148    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
149
150    // halt a regionServerReport
151    RESUME_RS_REPORT = new CountDownLatch(1);
152    ARRIVE_RS_REPORT = new CountDownLatch(1);
153    FINISH_RS_REPORT = new CountDownLatch(1);
154
155    ARRIVE_RS_REPORT.await();
156
157    // schedule a TRSP to REOPEN the region
158    RESUME_REPORT_STATE = new CountDownLatch(1);
159    Future<byte[]> future =
160      am.moveAsync(new RegionPlan(region, rsn.getRegionLocation(), rsn.getRegionLocation()));
161    TransitRegionStateProcedure proc =
162      procExec.getProcedures().stream().filter(p -> p instanceof TransitRegionStateProcedure)
163        .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p).findAny().get();
164    IdLock procExecLock = procExec.getProcExecutionLock();
165    // a CloseRegionProcedure and then the OpenRegionProcedure we want to block
166    IdLock.Entry lockEntry = procExecLock.getLockEntry(proc.getProcId() + 2);
167    // resume the reportRegionStateTransition to finish the CloseRegionProcedure
168    RESUME_REPORT_STATE.countDown();
169    // wait until we schedule the OpenRegionProcedure
170    UTIL.waitFor(10000,
171      () -> proc.getCurrentStateId() == REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE);
172    // the region should be in OPENING state
173    assertEquals(RegionState.State.OPENING, rsn.getState());
174    // resume the region server report
175    RESUME_RS_REPORT.countDown();
176    // wait until it finishes, it will find that the region is opened on the rs
177    FINISH_RS_REPORT.await();
178    // let the OpenRegionProcedure go
179    procExecLock.releaseLockEntry(lockEntry);
180    // wait until the TRSP is done
181    future.get();
182
183    // confirm that the region can still be write, i.e, the regionServerReport method should not
184    // change the region state to OPEN
185    try (Table table = UTIL.getConnection().getTableBuilder(NAME, null).setWriteRpcTimeout(1000)
186      .setOperationTimeout(2000).build()) {
187      table.put(
188        new Put(Bytes.toBytes("key")).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes("val")));
189    }
190  }
191}