001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HBaseClassTestRule;
027import org.apache.hadoop.hbase.HBaseTestingUtility;
028import org.apache.hadoop.hbase.HRegionInfo;
029import org.apache.hadoop.hbase.MiniHBaseCluster;
030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
031import org.apache.hadoop.hbase.ServerName;
032import org.apache.hadoop.hbase.Waiter;
033import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
034import org.apache.hadoop.hbase.master.assignment.RegionStates;
035import org.apache.hadoop.hbase.testclassification.MediumTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
038import org.apache.hadoop.hbase.zookeeper.ZKUtil;
039import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
040import org.apache.zookeeper.KeeperException;
041import org.junit.AfterClass;
042import org.junit.BeforeClass;
043import org.junit.ClassRule;
044import org.junit.Test;
045import org.junit.experimental.categories.Category;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049/**
050 * Tests handling of meta-carrying region server failover.
051 */
052@Category(MediumTests.class)
053public class TestMetaShutdownHandler {
054  private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class);
055  @ClassRule
056  public static final HBaseClassTestRule CLASS_RULE =
057      HBaseClassTestRule.forClass(TestMetaShutdownHandler.class);
058
059  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
060  final static Configuration conf = TEST_UTIL.getConfiguration();
061
062  @BeforeClass
063  public static void setUpBeforeClass() throws Exception {
064    TEST_UTIL.startMiniCluster(2, 4, null, null, MyRegionServer.class);
065  }
066
067  @AfterClass
068  public static void tearDownAfterClass() throws Exception {
069    TEST_UTIL.shutdownMiniCluster();
070  }
071
072  /**
073   * This test will test the expire handling of a meta-carrying
074   * region server.
075   * After HBaseMiniCluster is up, we will delete the ephemeral
076   * node of the meta-carrying region server, which will trigger
077   * the expire of this region server on the master.
078   * On the other hand, we will slow down the abort process on
079   * the region server so that it is still up during the master SSH.
080   * We will check that the master SSH is still successfully done.
081   */
082  @Test
083  public void testExpireMetaRegionServer() throws Exception {
084    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
085    HMaster master = cluster.getMaster();
086    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
087    ServerName metaServerName = regionStates.getRegionServerOfRegion(
088      HRegionInfo.FIRST_META_REGIONINFO);
089    if (master.getServerName().equals(metaServerName) || metaServerName == null
090        || !metaServerName.equals(cluster.getServerHoldingMeta())) {
091      // Move meta off master
092      metaServerName =
093          cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName();
094      master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
095        Bytes.toBytes(metaServerName.getServerName()));
096      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
097      metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
098    }
099    RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
100    assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState());
101    assertNotEquals("Meta is on master!", metaServerName, master.getServerName());
102
103    // Delete the ephemeral node of the meta-carrying region server.
104    // This is trigger the expire of this region server on the master.
105    String rsEphemeralNodePath =
106        ZNodePaths.joinZNode(master.getZooKeeper().znodePaths.rsZNode, metaServerName.toString());
107    ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath);
108    TEST_UTIL.decrementMinRegionServerCount();
109    LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH");
110    // Wait for SSH to finish
111    final ServerManager serverManager = master.getServerManager();
112    final ServerName priorMetaServerName = metaServerName;
113    TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
114      @Override
115      public boolean evaluate() throws Exception {
116        return !serverManager.isServerOnline(priorMetaServerName)
117            && !serverManager.areDeadServersInProgress();
118      }
119    });
120    LOG.info("Past wait on RIT");
121    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
122    // Now, make sure meta is assigned
123    assertTrue("Meta should be assigned",
124      regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
125    // Now, make sure meta is registered in zk
126    metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
127    assertEquals("Meta should not be in transition", RegionState.State.OPEN,
128        metaState.getState());
129    assertEquals("Meta should be assigned", metaState.getServerName(),
130      regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
131    assertNotEquals("Meta should be assigned on a different server",
132      metaState.getServerName(), metaServerName);
133  }
134
135  /**
136   * Master should be able to recover from any unexpected state of meta-region-server znode
137   */
138  @Test
139  public void testMetaAssignmentFailure() throws Exception {
140    final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
141    HMaster master = cluster.getMaster();
142    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
143    ServerName metaServerName =
144        regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
145    if (master.getServerName().equals(metaServerName) || metaServerName == null
146        || !metaServerName.equals(cluster.getServerHoldingMeta())) {
147      metaServerName =
148          cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName();
149      master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
150        Bytes.toBytes(metaServerName.getServerName()));
151      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
152      metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
153    }
154    RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
155    assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState());
156    assertNotEquals("Meta is on master!", metaServerName, master.getServerName());
157    // Setting meta state to incorrect state OPENING, to see if master restarts or standby node can
158    // recover it
159    MetaTableLocator.setMetaLocation(master.getZooKeeper(), metaServerName,
160      RegionState.State.OPENING);
161    master.abort("Abort to test whether standby assign the meta OPENING region");
162    AssignmentTestingUtil.killRs(TEST_UTIL, metaServerName);
163    final HMaster oldMaster = master;
164    TEST_UTIL.decrementMinRegionServerCount();
165    TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
166      @Override
167      public boolean evaluate() throws Exception {
168        // test that standby master should be able to recover meta
169        return cluster.getMaster() != null && cluster.getMaster().isInitialized()
170            && oldMaster != cluster.getMaster();
171      }
172    });
173    master = cluster.getMaster();
174    // Now, make sure meta is assigned
175    assertTrue("Meta should be assigned", master.getAssignmentManager().getRegionStates()
176        .isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
177    // Now, make sure meta is registered in zk as well
178    metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
179    assertEquals("Meta should not be in transition", RegionState.State.OPEN, metaState.getState());
180    assertEquals("Meta should be assigned", metaState.getServerName(), master.getAssignmentManager()
181        .getRegionStates().getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
182  }
183
184  public static class MyRegionServer extends MiniHBaseClusterRegionServer {
185
186    public MyRegionServer(Configuration conf) throws IOException, KeeperException,
187        InterruptedException {
188      super(conf);
189    }
190
191    @Override
192    public void abort(String reason, Throwable cause) {
193      // sleep to slow down the region server abort
194      try {
195        Thread.sleep(30*1000);
196      } catch (InterruptedException e) {
197        return;
198      }
199      super.abort(reason, cause);
200    }
201  }
202}