001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertNotEquals; 022import static org.junit.jupiter.api.Assertions.assertTrue; 023 024import java.io.IOException; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.hbase.HBaseTestingUtil; 027import org.apache.hadoop.hbase.ServerName; 028import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 029import org.apache.hadoop.hbase.SingleProcessHBaseCluster.MiniHBaseClusterRegionServer; 030import org.apache.hadoop.hbase.StartTestingClusterOption; 031import org.apache.hadoop.hbase.Waiter; 032import org.apache.hadoop.hbase.client.RegionInfoBuilder; 033import org.apache.hadoop.hbase.master.assignment.RegionStates; 034import org.apache.hadoop.hbase.regionserver.HRegionServer; 035import org.apache.hadoop.hbase.testclassification.MediumTests; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 038import org.apache.hadoop.hbase.zookeeper.ZKUtil; 039import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 040import org.apache.zookeeper.KeeperException; 041import org.junit.jupiter.api.AfterAll; 042import org.junit.jupiter.api.BeforeAll; 043import org.junit.jupiter.api.Tag; 044import org.junit.jupiter.api.Test; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048/** 049 * Tests handling of meta-carrying region server failover. 050 */ 051@Tag(MediumTests.TAG) 052public class TestMetaShutdownHandler { 053 private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class); 054 055 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 056 final static Configuration conf = TEST_UTIL.getConfiguration(); 057 058 @BeforeAll 059 public static void setUpBeforeClass() throws Exception { 060 StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(3) 061 .rsClass(MyRegionServer.class).numDataNodes(3).build(); 062 TEST_UTIL.startMiniCluster(option); 063 } 064 065 @AfterAll 066 public static void tearDownAfterClass() throws Exception { 067 TEST_UTIL.shutdownMiniCluster(); 068 } 069 070 /** 071 * This test will test the expire handling of a meta-carrying region server. After 072 * HBaseMiniCluster is up, we will delete the ephemeral node of the meta-carrying region server, 073 * which will trigger the expire of this region server on the master. On the other hand, we will 074 * slow down the abort process on the region server so that it is still up during the master SSH. 075 * We will check that the master SSH is still successfully done. 076 */ 077 @Test 078 public void testExpireMetaRegionServer() throws Exception { 079 SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 080 HMaster master = cluster.getMaster(); 081 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 082 ServerName metaServerName = 083 regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO); 084 if ( 085 master.getServerName().equals(metaServerName) || metaServerName == null 086 || !metaServerName.equals(cluster.getServerHoldingMeta()) 087 ) { 088 // Move meta off master 089 metaServerName = 090 cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName(); 091 master.move(RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 092 Bytes.toBytes(metaServerName.getServerName())); 093 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 094 metaServerName = 095 regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO); 096 } 097 RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 098 assertEquals(metaState.getState(), RegionState.State.OPEN, "Wrong state for meta!"); 099 assertNotEquals(master.getServerName(), metaServerName, "Meta is on master!"); 100 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerName); 101 102 // Delete the ephemeral node of the meta-carrying region server. 103 // This is trigger the expire of this region server on the master. 104 String rsEphemeralNodePath = ZNodePaths.joinZNode(master.getZooKeeper().getZNodePaths().rsZNode, 105 metaServerName.toString()); 106 ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath); 107 LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH"); 108 // Wait for SSH to finish 109 final ServerManager serverManager = master.getServerManager(); 110 final ServerName priorMetaServerName = metaServerName; 111 TEST_UTIL.waitFor(60000, 100, () -> metaRegionServer.isStopped()); 112 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { 113 @Override 114 public boolean evaluate() throws Exception { 115 return !serverManager.isServerOnline(priorMetaServerName) 116 && !serverManager.areDeadServersInProgress(); 117 } 118 }); 119 LOG.info("Past wait on RIT"); 120 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 121 // Now, make sure meta is assigned 122 assertTrue(regionStates.isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO), 123 "Meta should be assigned"); 124 // Now, make sure meta is registered in zk 125 metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 126 assertEquals(RegionState.State.OPEN, metaState.getState(), "Meta should not be in transition"); 127 assertEquals(metaState.getServerName(), 128 regionStates.getRegionServerOfRegion(RegionInfoBuilder.FIRST_META_REGIONINFO), 129 "Meta should be assigned"); 130 assertNotEquals(metaServerName, metaState.getServerName(), 131 "Meta should be assigned on a different server"); 132 } 133 134 public static class MyRegionServer extends MiniHBaseClusterRegionServer { 135 136 public MyRegionServer(Configuration conf) 137 throws IOException, KeeperException, InterruptedException { 138 super(conf); 139 } 140 141 @Override 142 public void abort(String reason, Throwable cause) { 143 // sleep to slow down the region server abort 144 try { 145 Thread.sleep(30 * 1000); 146 } catch (InterruptedException e) { 147 return; 148 } 149 super.abort(reason, cause); 150 } 151 } 152}