001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.hbase.HBaseClassTestRule; 027import org.apache.hadoop.hbase.HBaseTestingUtility; 028import org.apache.hadoop.hbase.HRegionInfo; 029import org.apache.hadoop.hbase.MiniHBaseCluster; 030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; 031import org.apache.hadoop.hbase.ServerName; 032import org.apache.hadoop.hbase.Waiter; 033import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; 034import org.apache.hadoop.hbase.master.assignment.RegionStates; 035import org.apache.hadoop.hbase.testclassification.MediumTests; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 038import org.apache.hadoop.hbase.zookeeper.ZKUtil; 039import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 040import org.apache.zookeeper.KeeperException; 041import org.junit.AfterClass; 042import org.junit.BeforeClass; 043import org.junit.ClassRule; 044import org.junit.Test; 045import org.junit.experimental.categories.Category; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049/** 050 * Tests handling of meta-carrying region server failover. 051 */ 052@Category(MediumTests.class) 053public class TestMetaShutdownHandler { 054 private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class); 055 @ClassRule 056 public static final HBaseClassTestRule CLASS_RULE = 057 HBaseClassTestRule.forClass(TestMetaShutdownHandler.class); 058 059 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 060 final static Configuration conf = TEST_UTIL.getConfiguration(); 061 062 @BeforeClass 063 public static void setUpBeforeClass() throws Exception { 064 TEST_UTIL.startMiniCluster(2, 4, null, null, MyRegionServer.class); 065 } 066 067 @AfterClass 068 public static void tearDownAfterClass() throws Exception { 069 TEST_UTIL.shutdownMiniCluster(); 070 } 071 072 /** 073 * This test will test the expire handling of a meta-carrying 074 * region server. 075 * After HBaseMiniCluster is up, we will delete the ephemeral 076 * node of the meta-carrying region server, which will trigger 077 * the expire of this region server on the master. 078 * On the other hand, we will slow down the abort process on 079 * the region server so that it is still up during the master SSH. 080 * We will check that the master SSH is still successfully done. 081 */ 082 @Test 083 public void testExpireMetaRegionServer() throws Exception { 084 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 085 HMaster master = cluster.getMaster(); 086 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 087 ServerName metaServerName = regionStates.getRegionServerOfRegion( 088 HRegionInfo.FIRST_META_REGIONINFO); 089 if (master.getServerName().equals(metaServerName) || metaServerName == null 090 || !metaServerName.equals(cluster.getServerHoldingMeta())) { 091 // Move meta off master 092 metaServerName = 093 cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName(); 094 master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 095 Bytes.toBytes(metaServerName.getServerName())); 096 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 097 metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); 098 } 099 RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 100 assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState()); 101 assertNotEquals("Meta is on master!", metaServerName, master.getServerName()); 102 103 // Delete the ephemeral node of the meta-carrying region server. 104 // This is trigger the expire of this region server on the master. 105 String rsEphemeralNodePath = 106 ZNodePaths.joinZNode(master.getZooKeeper().znodePaths.rsZNode, metaServerName.toString()); 107 ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath); 108 TEST_UTIL.decrementMinRegionServerCount(); 109 LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH"); 110 // Wait for SSH to finish 111 final ServerManager serverManager = master.getServerManager(); 112 final ServerName priorMetaServerName = metaServerName; 113 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { 114 @Override 115 public boolean evaluate() throws Exception { 116 return !serverManager.isServerOnline(priorMetaServerName) 117 && !serverManager.areDeadServersInProgress(); 118 } 119 }); 120 LOG.info("Past wait on RIT"); 121 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 122 // Now, make sure meta is assigned 123 assertTrue("Meta should be assigned", 124 regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); 125 // Now, make sure meta is registered in zk 126 metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 127 assertEquals("Meta should not be in transition", RegionState.State.OPEN, 128 metaState.getState()); 129 assertEquals("Meta should be assigned", metaState.getServerName(), 130 regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); 131 assertNotEquals("Meta should be assigned on a different server", 132 metaState.getServerName(), metaServerName); 133 } 134 135 /** 136 * Master should be able to recover from any unexpected state of meta-region-server znode 137 */ 138 @Test 139 public void testMetaAssignmentFailure() throws Exception { 140 final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 141 HMaster master = cluster.getMaster(); 142 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 143 ServerName metaServerName = 144 regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); 145 if (master.getServerName().equals(metaServerName) || metaServerName == null 146 || !metaServerName.equals(cluster.getServerHoldingMeta())) { 147 metaServerName = 148 cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName(); 149 master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 150 Bytes.toBytes(metaServerName.getServerName())); 151 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 152 metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); 153 } 154 RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 155 assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState()); 156 assertNotEquals("Meta is on master!", metaServerName, master.getServerName()); 157 // Setting meta state to incorrect state OPENING, to see if master restarts or standby node can 158 // recover it 159 MetaTableLocator.setMetaLocation(master.getZooKeeper(), metaServerName, 160 RegionState.State.OPENING); 161 master.abort("Abort to test whether standby assign the meta OPENING region"); 162 AssignmentTestingUtil.killRs(TEST_UTIL, metaServerName); 163 final HMaster oldMaster = master; 164 TEST_UTIL.decrementMinRegionServerCount(); 165 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { 166 @Override 167 public boolean evaluate() throws Exception { 168 // test that standby master should be able to recover meta 169 return cluster.getMaster() != null && cluster.getMaster().isInitialized() 170 && oldMaster != cluster.getMaster(); 171 } 172 }); 173 master = cluster.getMaster(); 174 // Now, make sure meta is assigned 175 assertTrue("Meta should be assigned", master.getAssignmentManager().getRegionStates() 176 .isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); 177 // Now, make sure meta is registered in zk as well 178 metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 179 assertEquals("Meta should not be in transition", RegionState.State.OPEN, metaState.getState()); 180 assertEquals("Meta should be assigned", metaState.getServerName(), master.getAssignmentManager() 181 .getRegionStates().getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); 182 } 183 184 public static class MyRegionServer extends MiniHBaseClusterRegionServer { 185 186 public MyRegionServer(Configuration conf) throws IOException, KeeperException, 187 InterruptedException { 188 super(conf); 189 } 190 191 @Override 192 public void abort(String reason, Throwable cause) { 193 // sleep to slow down the region server abort 194 try { 195 Thread.sleep(30*1000); 196 } catch (InterruptedException e) { 197 return; 198 } 199 super.abort(reason, cause); 200 } 201 } 202}