001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertNotEquals;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import java.io.IOException;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HBaseTestingUtil;
027import org.apache.hadoop.hbase.ServerName;
028import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
029import org.apache.hadoop.hbase.SingleProcessHBaseCluster.MiniHBaseClusterRegionServer;
030import org.apache.hadoop.hbase.StartTestingClusterOption;
031import org.apache.hadoop.hbase.Waiter;
032import org.apache.hadoop.hbase.client.RegionInfoBuilder;
033import org.apache.hadoop.hbase.master.assignment.RegionStates;
034import org.apache.hadoop.hbase.regionserver.HRegionServer;
035import org.apache.hadoop.hbase.testclassification.MediumTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
038import org.apache.hadoop.hbase.zookeeper.ZKUtil;
039import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
040import org.apache.zookeeper.KeeperException;
041import org.junit.jupiter.api.AfterAll;
042import org.junit.jupiter.api.BeforeAll;
043import org.junit.jupiter.api.Tag;
044import org.junit.jupiter.api.Test;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048/**
049 * Tests handling of meta-carrying region server failover.
050 */
051@Tag(MediumTests.TAG)
052public class TestMetaShutdownHandler {
053  private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class);
054
055  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
056  final static Configuration conf = TEST_UTIL.getConfiguration();
057
058  @BeforeAll
059  public static void setUpBeforeClass() throws Exception {
060    StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(3)
061      .rsClass(MyRegionServer.class).numDataNodes(3).build();
062    TEST_UTIL.startMiniCluster(option);
063  }
064
065  @AfterAll
066  public static void tearDownAfterClass() throws Exception {
067    TEST_UTIL.shutdownMiniCluster();
068  }
069
070  /**
071   * This test will test the expire handling of a meta-carrying region server. After
072   * HBaseMiniCluster is up, we will delete the ephemeral node of the meta-carrying region server,
073   * which will trigger the expire of this region server on the master. On the other hand, we will
074   * slow down the abort process on the region server so that it is still up during the master SSH.
075   * We will check that the master SSH is still successfully done.
076   */
077  @Test
078  public void testExpireMetaRegionServer() throws Exception {
079    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
080    HMaster master = cluster.getMaster();
081    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
082    ServerName metaServerName =
083      regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO);
084    if (
085      master.getServerName().equals(metaServerName) || metaServerName == null
086        || !metaServerName.equals(cluster.getServerHoldingMeta())
087    ) {
088      // Move meta off master
089      metaServerName =
090        cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName();
091      master.move(RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
092        Bytes.toBytes(metaServerName.getServerName()));
093      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
094      metaServerName =
095        regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO);
096    }
097    RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
098    assertEquals(metaState.getState(), RegionState.State.OPEN, "Wrong state for meta!");
099    assertNotEquals(master.getServerName(), metaServerName, "Meta is on master!");
100    HRegionServer metaRegionServer = cluster.getRegionServer(metaServerName);
101
102    // Delete the ephemeral node of the meta-carrying region server.
103    // This is trigger the expire of this region server on the master.
104    String rsEphemeralNodePath = ZNodePaths.joinZNode(master.getZooKeeper().getZNodePaths().rsZNode,
105      metaServerName.toString());
106    ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath);
107    LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH");
108    // Wait for SSH to finish
109    final ServerManager serverManager = master.getServerManager();
110    final ServerName priorMetaServerName = metaServerName;
111    TEST_UTIL.waitFor(60000, 100, () -> metaRegionServer.isStopped());
112    TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
113      @Override
114      public boolean evaluate() throws Exception {
115        return !serverManager.isServerOnline(priorMetaServerName)
116          && !serverManager.areDeadServersInProgress();
117      }
118    });
119    LOG.info("Past wait on RIT");
120    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
121    // Now, make sure meta is assigned
122    assertTrue(regionStates.isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO),
123      "Meta should be assigned");
124    // Now, make sure meta is registered in zk
125    metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
126    assertEquals(RegionState.State.OPEN, metaState.getState(), "Meta should not be in transition");
127    assertEquals(metaState.getServerName(),
128      regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO),
129      "Meta should be assigned");
130    assertNotEquals(metaServerName, metaState.getServerName(),
131      "Meta should be assigned on a different server");
132  }
133
134  public static class MyRegionServer extends MiniHBaseClusterRegionServer {
135
136    public MyRegionServer(Configuration conf)
137      throws IOException, KeeperException, InterruptedException {
138      super(conf);
139    }
140
141    @Override
142    public void abort(String reason, Throwable cause) {
143      // sleep to slow down the region server abort
144      try {
145        Thread.sleep(30 * 1000);
146      } catch (InterruptedException e) {
147        return;
148      }
149      super.abort(reason, cause);
150    }
151  }
152}