001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HBaseClassTestRule;
027import org.apache.hadoop.hbase.HBaseTestingUtil;
028import org.apache.hadoop.hbase.ServerName;
029import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
030import org.apache.hadoop.hbase.SingleProcessHBaseCluster.MiniHBaseClusterRegionServer;
031import org.apache.hadoop.hbase.StartTestingClusterOption;
032import org.apache.hadoop.hbase.Waiter;
033import org.apache.hadoop.hbase.client.RegionInfoBuilder;
034import org.apache.hadoop.hbase.master.assignment.RegionStates;
035import org.apache.hadoop.hbase.regionserver.HRegionServer;
036import org.apache.hadoop.hbase.testclassification.MediumTests;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
039import org.apache.hadoop.hbase.zookeeper.ZKUtil;
040import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
041import org.apache.zookeeper.KeeperException;
042import org.junit.AfterClass;
043import org.junit.BeforeClass;
044import org.junit.ClassRule;
045import org.junit.Test;
046import org.junit.experimental.categories.Category;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050/**
051 * Tests handling of meta-carrying region server failover.
052 */
053@Category(MediumTests.class)
054public class TestMetaShutdownHandler {
055  private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class);
056  @ClassRule
057  public static final HBaseClassTestRule CLASS_RULE =
058    HBaseClassTestRule.forClass(TestMetaShutdownHandler.class);
059
060  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
061  final static Configuration conf = TEST_UTIL.getConfiguration();
062
063  @BeforeClass
064  public static void setUpBeforeClass() throws Exception {
065    StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(3)
066      .rsClass(MyRegionServer.class).numDataNodes(3).build();
067    TEST_UTIL.startMiniCluster(option);
068  }
069
070  @AfterClass
071  public static void tearDownAfterClass() throws Exception {
072    TEST_UTIL.shutdownMiniCluster();
073  }
074
075  /**
076   * This test will test the expire handling of a meta-carrying region server. After
077   * HBaseMiniCluster is up, we will delete the ephemeral node of the meta-carrying region server,
078   * which will trigger the expire of this region server on the master. On the other hand, we will
079   * slow down the abort process on the region server so that it is still up during the master SSH.
080   * We will check that the master SSH is still successfully done.
081   */
082  @Test
083  public void testExpireMetaRegionServer() throws Exception {
084    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
085    HMaster master = cluster.getMaster();
086    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
087    ServerName metaServerName =
088      regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO);
089    if (
090      master.getServerName().equals(metaServerName) || metaServerName == null
091        || !metaServerName.equals(cluster.getServerHoldingMeta())
092    ) {
093      // Move meta off master
094      metaServerName =
095        cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName();
096      master.move(RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
097        Bytes.toBytes(metaServerName.getServerName()));
098      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
099      metaServerName =
100        regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO);
101    }
102    RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
103    assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState());
104    assertNotEquals("Meta is on master!", metaServerName, master.getServerName());
105    HRegionServer metaRegionServer = cluster.getRegionServer(metaServerName);
106
107    // Delete the ephemeral node of the meta-carrying region server.
108    // This is trigger the expire of this region server on the master.
109    String rsEphemeralNodePath = ZNodePaths.joinZNode(master.getZooKeeper().getZNodePaths().rsZNode,
110      metaServerName.toString());
111    ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath);
112    LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH");
113    // Wait for SSH to finish
114    final ServerManager serverManager = master.getServerManager();
115    final ServerName priorMetaServerName = metaServerName;
116    TEST_UTIL.waitFor(60000, 100, () -> metaRegionServer.isStopped());
117    TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
118      @Override
119      public boolean evaluate() throws Exception {
120        return !serverManager.isServerOnline(priorMetaServerName)
121          && !serverManager.areDeadServersInProgress();
122      }
123    });
124    LOG.info("Past wait on RIT");
125    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
126    // Now, make sure meta is assigned
127    assertTrue("Meta should be assigned",
128      regionStates.isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO));
129    // Now, make sure meta is registered in zk
130    metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
131    assertEquals("Meta should not be in transition", RegionState.State.OPEN, metaState.getState());
132    assertEquals("Meta should be assigned", metaState.getServerName(),
133      regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO));
134    assertNotEquals("Meta should be assigned on a different server", metaState.getServerName(),
135      metaServerName);
136  }
137
138  public static class MyRegionServer extends MiniHBaseClusterRegionServer {
139
140    public MyRegionServer(Configuration conf)
141      throws IOException, KeeperException, InterruptedException {
142      super(conf);
143    }
144
145    @Override
146    public void abort(String reason, Throwable cause) {
147      // sleep to slow down the region server abort
148      try {
149        Thread.sleep(30 * 1000);
150      } catch (InterruptedException e) {
151        return;
152      }
153      super.abort(reason, cause);
154    }
155  }
156}