001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.concurrent.CountDownLatch;
022import java.util.concurrent.Future;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.HBaseTestingUtil;
025import org.apache.hadoop.hbase.HConstants;
026import org.apache.hadoop.hbase.ServerName;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.master.HMaster;
030import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
031import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
032import org.apache.hadoop.hbase.procedure2.Procedure;
033import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
034import org.apache.hadoop.hbase.testclassification.MasterTests;
035import org.apache.hadoop.hbase.testclassification.MediumTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.zookeeper.KeeperException;
038import org.junit.jupiter.api.AfterAll;
039import org.junit.jupiter.api.BeforeAll;
040import org.junit.jupiter.api.Tag;
041import org.junit.jupiter.api.Test;
042import org.slf4j.Logger;
043import org.slf4j.LoggerFactory;
044
045import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
046
047/**
048 * Testcase for HBASE-28522.
049 * <p>
050 * We used to have test with the same name but in different package for HBASE-23636, where DTP will
051 * hold the exclusive lock all the time, and it will reset TRSPs which has been attached to
052 * RegionStateNodes, so we need special logic in SCP to deal with it.
053 * <p>
054 * After HBASE-28522, DTP will not reset TRSPs any more, so SCP does not need to take care of this
055 * special case, thues we removed the special logic in SCP and also the UT for HBASE-22636 is not
056 * valid any more, so we just removed the old one and introduce a new one with the same name here.
057 */
058@Tag(MasterTests.TAG)
059@Tag(MediumTests.TAG)
060public class TestRaceBetweenSCPAndDTP {
061
062  private static final Logger LOG = LoggerFactory.getLogger(TestRaceBetweenSCPAndDTP.class);
063
064  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
065
066  private static TableName NAME = TableName.valueOf("Race");
067
068  private static byte[] CF = Bytes.toBytes("cf");
069
070  private static CountDownLatch ARRIVE_GET_REPLICATION_PEER_MANAGER;
071
072  private static CountDownLatch RESUME_GET_REPLICATION_PEER_MANAGER;
073
074  public static final class HMasterForTest extends HMaster {
075
076    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
077      super(conf);
078    }
079
080    @Override
081    public ReplicationPeerManager getReplicationPeerManager() {
082      if (ARRIVE_GET_REPLICATION_PEER_MANAGER != null) {
083        ARRIVE_GET_REPLICATION_PEER_MANAGER.countDown();
084        ARRIVE_GET_REPLICATION_PEER_MANAGER = null;
085        try {
086          RESUME_GET_REPLICATION_PEER_MANAGER.await();
087        } catch (InterruptedException e) {
088        }
089      }
090      return super.getReplicationPeerManager();
091    }
092  }
093
094  @BeforeAll
095  public static void setUp() throws Exception {
096    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
097    UTIL.startMiniCluster(2);
098    UTIL.createTable(NAME, CF);
099    UTIL.waitTableAvailable(NAME);
100    UTIL.getAdmin().balancerSwitch(false, true);
101  }
102
103  @AfterAll
104  public static void tearDown() throws Exception {
105    UTIL.shutdownMiniCluster();
106  }
107
108  private boolean wasExecuted(Procedure<?> proc) {
109    // RUNNABLE is not enough to make sure that the DTP has acquired the table lock, as we will set
110    // procedure to RUNNABLE first and then acquire the execution lock
111    return proc.wasExecuted() || proc.getState() == ProcedureState.WAITING_TIMEOUT
112      || proc.getState() == ProcedureState.WAITING;
113  }
114
115  @Test
116  public void testRace() throws Exception {
117    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
118    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
119    ServerName sn = am.getRegionStates().getRegionState(region).getServerName();
120    LOG.info("ServerName={}, region={}", sn, region);
121
122    ARRIVE_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1);
123    RESUME_GET_REPLICATION_PEER_MANAGER = new CountDownLatch(1);
124    // Assign to local variable because this static gets set to null in above running thread and
125    // so NPE.
126    CountDownLatch cdl = ARRIVE_GET_REPLICATION_PEER_MANAGER;
127    UTIL.getMiniHBaseCluster().stopRegionServer(sn);
128    cdl.await();
129
130    Future<?> future = UTIL.getAdmin().disableTableAsync(NAME);
131    ProcedureExecutor<?> procExec =
132      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
133    // make sure the DTP has been executed
134    UTIL.waitFor(60000,
135      () -> procExec.getProcedures().stream().filter(p -> p instanceof DisableTableProcedure)
136        .map(p -> (DisableTableProcedure) p).filter(p -> p.getTableName().equals(NAME))
137        .anyMatch(this::wasExecuted));
138    RESUME_GET_REPLICATION_PEER_MANAGER.countDown();
139
140    // make sure the DTP can finish
141    future.get();
142
143    // also make sure all SCPs are finished
144    UTIL.waitFor(60000, () -> procExec.getProcedures().stream()
145      .filter(p -> p instanceof ServerCrashProcedure).allMatch(Procedure::isFinished));
146  }
147}