001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.hbase.HBaseClassTestRule; 027import org.apache.hadoop.hbase.HBaseTestingUtility; 028import org.apache.hadoop.hbase.HRegionInfo; 029import org.apache.hadoop.hbase.MiniHBaseCluster; 030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; 031import org.apache.hadoop.hbase.ServerName; 032import org.apache.hadoop.hbase.StartMiniClusterOption; 033import org.apache.hadoop.hbase.Waiter; 034import org.apache.hadoop.hbase.master.assignment.RegionStates; 035import org.apache.hadoop.hbase.testclassification.MediumTests; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 038import org.apache.hadoop.hbase.zookeeper.ZKUtil; 039import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 040import org.apache.zookeeper.KeeperException; 041import org.junit.AfterClass; 042import org.junit.BeforeClass; 043import org.junit.ClassRule; 044import org.junit.Test; 045import org.junit.experimental.categories.Category; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049/** 050 * Tests handling of meta-carrying region server failover. 051 */ 052@Category(MediumTests.class) 053public class TestMetaShutdownHandler { 054 private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class); 055 @ClassRule 056 public static final HBaseClassTestRule CLASS_RULE = 057 HBaseClassTestRule.forClass(TestMetaShutdownHandler.class); 058 059 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 060 final static Configuration conf = TEST_UTIL.getConfiguration(); 061 062 @BeforeClass 063 public static void setUpBeforeClass() throws Exception { 064 StartMiniClusterOption option = StartMiniClusterOption.builder() 065 .numRegionServers(3).rsClass(MyRegionServer.class).numDataNodes(3).build(); 066 TEST_UTIL.startMiniCluster(option); 067 } 068 069 @AfterClass 070 public static void tearDownAfterClass() throws Exception { 071 TEST_UTIL.shutdownMiniCluster(); 072 } 073 074 /** 075 * This test will test the expire handling of a meta-carrying 076 * region server. 077 * After HBaseMiniCluster is up, we will delete the ephemeral 078 * node of the meta-carrying region server, which will trigger 079 * the expire of this region server on the master. 080 * On the other hand, we will slow down the abort process on 081 * the region server so that it is still up during the master SSH. 082 * We will check that the master SSH is still successfully done. 083 */ 084 @Test 085 public void testExpireMetaRegionServer() throws Exception { 086 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 087 HMaster master = cluster.getMaster(); 088 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 089 ServerName metaServerName = regionStates.getRegionServerOfRegion( 090 HRegionInfo.FIRST_META_REGIONINFO); 091 if (master.getServerName().equals(metaServerName) || metaServerName == null 092 || !metaServerName.equals(cluster.getServerHoldingMeta())) { 093 // Move meta off master 094 metaServerName = 095 cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName(); 096 master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 097 Bytes.toBytes(metaServerName.getServerName())); 098 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 099 metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); 100 } 101 RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 102 assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState()); 103 assertNotEquals("Meta is on master!", metaServerName, master.getServerName()); 104 105 // Delete the ephemeral node of the meta-carrying region server. 106 // This is trigger the expire of this region server on the master. 107 String rsEphemeralNodePath = 108 ZNodePaths.joinZNode(master.getZooKeeper().getZNodePaths().rsZNode, 109 metaServerName.toString()); 110 ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath); 111 LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH"); 112 // Wait for SSH to finish 113 final ServerManager serverManager = master.getServerManager(); 114 final ServerName priorMetaServerName = metaServerName; 115 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { 116 @Override 117 public boolean evaluate() throws Exception { 118 return !serverManager.isServerOnline(priorMetaServerName) 119 && !serverManager.areDeadServersInProgress(); 120 } 121 }); 122 LOG.info("Past wait on RIT"); 123 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 124 // Now, make sure meta is assigned 125 assertTrue("Meta should be assigned", 126 regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); 127 // Now, make sure meta is registered in zk 128 metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 129 assertEquals("Meta should not be in transition", RegionState.State.OPEN, 130 metaState.getState()); 131 assertEquals("Meta should be assigned", metaState.getServerName(), 132 regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); 133 assertNotEquals("Meta should be assigned on a different server", 134 metaState.getServerName(), metaServerName); 135 } 136 137 public static class MyRegionServer extends MiniHBaseClusterRegionServer { 138 139 public MyRegionServer(Configuration conf) throws IOException, KeeperException, 140 InterruptedException { 141 super(conf); 142 } 143 144 @Override 145 public void abort(String reason, Throwable cause) { 146 // sleep to slow down the region server abort 147 try { 148 Thread.sleep(30*1000); 149 } catch (InterruptedException e) { 150 return; 151 } 152 super.abort(reason, cause); 153 } 154 } 155}