001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.util.concurrent.CountDownLatch;
022import org.apache.hadoop.hbase.HBaseClassTestRule;
023import org.apache.hadoop.hbase.HBaseTestingUtility;
024import org.apache.hadoop.hbase.ProcedureTestUtil;
025import org.apache.hadoop.hbase.ServerName;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.Put;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.client.Table;
030import org.apache.hadoop.hbase.master.HMaster;
031import org.apache.hadoop.hbase.master.ServerManager;
032import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
033import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
034import org.apache.hadoop.hbase.master.procedure.ServerProcedureInterface;
035import org.apache.hadoop.hbase.procedure2.Procedure;
036import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
037import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
038import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
039import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
040import org.apache.hadoop.hbase.regionserver.HRegionServer;
041import org.apache.hadoop.hbase.testclassification.MasterTests;
042import org.apache.hadoop.hbase.testclassification.MediumTests;
043import org.apache.hadoop.hbase.util.Bytes;
044import org.junit.AfterClass;
045import org.junit.BeforeClass;
046import org.junit.ClassRule;
047import org.junit.Test;
048import org.junit.experimental.categories.Category;
049
050/**
051 * Confirm that we will do backoff when retrying on closing a region, to avoid consuming all the
052 * CPUs.
053 */
054@Category({ MasterTests.class, MediumTests.class })
055public class TestCloseRegionWhileRSCrash {
056
057  @ClassRule
058  public static final HBaseClassTestRule CLASS_RULE =
059    HBaseClassTestRule.forClass(TestCloseRegionWhileRSCrash.class);
060
061  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
062
063  private static TableName TABLE_NAME = TableName.valueOf("Backoff");
064
065  private static byte[] CF = Bytes.toBytes("cf");
066
067  private static CountDownLatch ARRIVE = new CountDownLatch(1);
068
069  private static CountDownLatch RESUME = new CountDownLatch(1);
070
071  public static final class DummyServerProcedure extends Procedure<MasterProcedureEnv>
072      implements ServerProcedureInterface {
073
074    private ServerName serverName;
075
076    public DummyServerProcedure() {
077    }
078
079    public DummyServerProcedure(ServerName serverName) {
080      this.serverName = serverName;
081    }
082
083    @Override
084    public ServerName getServerName() {
085      return serverName;
086    }
087
088    @Override
089    public boolean hasMetaTableRegion() {
090      return false;
091    }
092
093    @Override
094    public ServerOperationType getServerOperationType() {
095      return ServerOperationType.CRASH_HANDLER;
096    }
097
098    @Override
099    protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
100        throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
101      ARRIVE.countDown();
102      RESUME.await();
103      return null;
104    }
105
106    @Override
107    protected LockState acquireLock(final MasterProcedureEnv env) {
108      if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) {
109        return LockState.LOCK_EVENT_WAIT;
110      }
111      return LockState.LOCK_ACQUIRED;
112    }
113
114    @Override
115    protected void releaseLock(final MasterProcedureEnv env) {
116      env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName());
117    }
118
119    @Override
120    protected boolean holdLock(MasterProcedureEnv env) {
121      return true;
122    }
123
124    @Override
125    protected void rollback(MasterProcedureEnv env) throws IOException, InterruptedException {
126    }
127
128    @Override
129    protected boolean abort(MasterProcedureEnv env) {
130      return false;
131    }
132
133    @Override
134    protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
135
136    }
137
138    @Override
139    protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
140    }
141  }
142
143  @BeforeClass
144  public static void setUp() throws Exception {
145    UTIL.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
146    UTIL.startMiniCluster(3);
147    UTIL.createTable(TABLE_NAME, CF);
148    UTIL.getAdmin().balancerSwitch(false, true);
149    HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
150    if (!srcRs.getRegions(TableName.META_TABLE_NAME).isEmpty()) {
151      RegionInfo metaRegion = srcRs.getRegions(TableName.META_TABLE_NAME).get(0).getRegionInfo();
152      HRegionServer dstRs = UTIL.getOtherRegionServer(srcRs);
153      UTIL.getAdmin().move(metaRegion.getEncodedNameAsBytes(), dstRs.getServerName());
154      UTIL.waitFor(30000, () -> !dstRs.getRegions(TableName.META_TABLE_NAME).isEmpty());
155    }
156  }
157
158  @AfterClass
159  public static void tearDown() throws Exception {
160    UTIL.shutdownMiniCluster();
161  }
162
163  @Test
164  public void testRetryBackoff() throws IOException, InterruptedException {
165    HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
166    RegionInfo region = srcRs.getRegions(TABLE_NAME).get(0).getRegionInfo();
167    HRegionServer dstRs = UTIL.getOtherRegionServer(srcRs);
168    ProcedureExecutor<MasterProcedureEnv> procExec =
169      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
170    procExec.submitProcedure(new DummyServerProcedure(srcRs.getServerName()));
171    ARRIVE.await();
172    UTIL.getMiniHBaseCluster().killRegionServer(srcRs.getServerName());
173    UTIL.waitFor(30000,
174      () -> procExec.getProcedures().stream().anyMatch(p -> p instanceof ServerCrashProcedure));
175    Thread t = new Thread(() -> {
176      try {
177        UTIL.getAdmin().move(region.getEncodedNameAsBytes(), dstRs.getServerName());
178      } catch (IOException e) {
179      }
180    });
181    t.start();
182    // wait until we enter the WAITING_TIMEOUT state
183    ProcedureTestUtil.waitUntilProcedureWaitingTimeout(UTIL, TransitRegionStateProcedure.class,
184      30000);
185    // wait until the timeout value increase three times
186    ProcedureTestUtil.waitUntilProcedureTimeoutIncrease(UTIL, TransitRegionStateProcedure.class, 3);
187    // close connection to make sure that we can not finish the TRSP
188    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
189    master.getConnection().close();
190    RESUME.countDown();
191    UTIL.waitFor(30000, () -> !master.isAlive());
192    // here we start a new master
193    UTIL.getMiniHBaseCluster().startMaster();
194    t.join();
195    // Make sure that the region is online, it may not on the original target server, as we will set
196    // forceNewPlan to true if there is a server crash
197    try (Table table = UTIL.getConnection().getTable(TABLE_NAME)) {
198      table.put(new Put(Bytes.toBytes(1)).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes(1)));
199    }
200  }
201}