001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.concurrent.CountDownLatch;
022import java.util.concurrent.Future;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.HBaseClassTestRule;
025import org.apache.hadoop.hbase.HBaseTestingUtil;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.ServerName;
028import org.apache.hadoop.hbase.TableName;
029import org.apache.hadoop.hbase.client.RegionInfo;
030import org.apache.hadoop.hbase.master.HMaster;
031import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
032import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
033import org.apache.hadoop.hbase.procedure2.Procedure;
034import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
035import org.apache.hadoop.hbase.testclassification.MasterTests;
036import org.apache.hadoop.hbase.testclassification.MediumTests;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.zookeeper.KeeperException;
039import org.junit.AfterClass;
040import org.junit.BeforeClass;
041import org.junit.ClassRule;
042import org.junit.Test;
043import org.junit.experimental.categories.Category;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
048
049/**
050 * Testcase for HBASE-28522.
051 * <p>
052 * We used to have test with the same name but in different package for HBASE-23636, where DTP will
053 * hold the exclusive lock all the time, and it will reset TRSPs which has been attached to
054 * RegionStateNodes, so we need special logic in SCP to deal with it.
055 * <p>
056 * After HBASE-28522, DTP will not reset TRSPs any more, so SCP does not need to take care of this
057 * special case, thues we removed the special logic in SCP and also the UT for HBASE-22636 is not
058 * valid any more, so we just removed the old one and introduce a new one with the same name here.
059 */
060@Category({ MasterTests.class, MediumTests.class })
061public class TestRaceBetweenSCPAndDTP {
062
063  @ClassRule
064  public static final HBaseClassTestRule CLASS_RULE =
065    HBaseClassTestRule.forClass(TestRaceBetweenSCPAndDTP.class);
066
067  private static final Logger LOG = LoggerFactory.getLogger(TestRaceBetweenSCPAndDTP.class);
068
069  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
070
071  private static TableName NAME = TableName.valueOf("Race");
072
073  private static byte[] CF = Bytes.toBytes("cf");
074
075  private static CountDownLatch ARRIVE_GET_REPLICATION_PEER_MANAGER;
076
077  private static CountDownLatch RESUME_GET_REPLICATION_PEER_MANAGER;
078
079  public static final class HMasterForTest extends HMaster {
080
081    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
082      super(conf);
083    }
084
085    @Override
086    public ReplicationPeerManager getReplicationPeerManager() {
087      if (ARRIVE_GET_REPLICATION_PEER_MANAGER != null) {
088        ARRIVE_GET_REPLICATION_PEER_MANAGER.countDown();
089        ARRIVE_GET_REPLICATION_PEER_MANAGER = null;
090        try {
091          RESUME_GET_REPLICATION_PEER_MANAGER.await();
092        } catch (InterruptedException e) {
093        }
094      }
095      return super.getReplicationPeerManager();
096    }
097  }
098
099  @BeforeClass
100  public static void setUp() throws Exception {
101    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
102    UTIL.startMiniCluster(2);
103    UTIL.createTable(NAME, CF);
104    UTIL.waitTableAvailable(NAME);
105    UTIL.getAdmin().balancerSwitch(false, true);
106  }
107
108  @AfterClass
109  public static void tearDown() throws Exception {
110    UTIL.shutdownMiniCluster();
111  }
112
113  private boolean wasExecuted(Procedure<?> proc) {
114    // RUNNABLE is not enough to make sure that the DTP has acquired the table lock, as we will set
115    // procedure to RUNNABLE first and then acquire the execution lock
116    return proc.wasExecuted() || proc.getState() == ProcedureState.WAITING_TIMEOUT
117      || proc.getState() == ProcedureState.WAITING;
118  }
119
120  @Test
121  public void testRace() throws Exception {
122    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
123    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
124    ServerName sn = am.getRegionStates().getRegionState(region).getServerName();
125    LOG.info("ServerName={}, region={}", sn, region);
126
127    ARRIVE_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1);
128    RESUME_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1);
129    // Assign to local variable because this static gets set to null in above running thread and
130    // so NPE.
131    CountDownLatch cdl = ARRIVE_GET_REPLICATION_PEER_MANAGER;
132    UTIL.getMiniHBaseCluster().stopRegionServer(sn);
133    cdl.await();
134
135    Future<?> future = UTIL.getAdmin().disableTableAsync(NAME);
136    ProcedureExecutor<?> procExec =
137      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
138    // make sure the DTP has been executed
139    UTIL.waitFor(60000,
140      () -> procExec.getProcedures().stream().filter(p -> p instanceof DisableTableProcedure)
141        .map(p -> (DisableTableProcedure) p).filter(p -> p.getTableName().equals(NAME))
142        .anyMatch(this::wasExecuted));
143    RESUME_GET_REPLICATION_PEER_MANAGER.countDown();
144
145    // make sure the DTP can finish
146    future.get();
147
148    // also make sure all SCPs are finished
149    UTIL.waitFor(60000, () -> procExec.getProcedures().stream()
150      .filter(p -> p instanceof ServerCrashProcedure).allMatch(Procedure::isFinished));
151  }
152}