001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.util.List; 025import java.util.Map; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.client.Admin; 028import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 029import org.apache.hadoop.hbase.client.Put; 030import org.apache.hadoop.hbase.client.RegionInfo; 031import org.apache.hadoop.hbase.client.ResultScanner; 032import org.apache.hadoop.hbase.client.Scan; 033import org.apache.hadoop.hbase.client.Table; 034import org.apache.hadoop.hbase.client.TableDescriptor; 035import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 036import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination; 037import org.apache.hadoop.hbase.master.HMaster; 038import org.apache.hadoop.hbase.master.LoadBalancer; 039import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer; 040import org.apache.hadoop.hbase.testclassification.LargeTests; 041import org.apache.hadoop.hbase.testclassification.MiscTests; 042import org.apache.hadoop.hbase.util.Bytes; 043import org.apache.hadoop.hbase.util.FSUtils; 044import org.apache.hadoop.hbase.zookeeper.ZKUtil; 045import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 046import org.apache.zookeeper.KeeperException; 047import org.junit.After; 048import org.junit.AfterClass; 049import org.junit.Before; 050import org.junit.BeforeClass; 051import org.junit.ClassRule; 052import org.junit.Rule; 053import org.junit.Test; 054import org.junit.experimental.categories.Category; 055import org.junit.rules.TestName; 056import org.slf4j.Logger; 057import org.slf4j.LoggerFactory; 058 059@Category({MiscTests.class, LargeTests.class}) 060public class TestZooKeeper { 061 062 @ClassRule 063 public static final HBaseClassTestRule CLASS_RULE = 064 HBaseClassTestRule.forClass(TestZooKeeper.class); 065 066 private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class); 067 068 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 069 070 @Rule 071 public TestName name = new TestName(); 072 073 @BeforeClass 074 public static void setUpBeforeClass() throws Exception { 075 // Test we can first start the ZK cluster by itself 076 Configuration conf = TEST_UTIL.getConfiguration(); 077 TEST_UTIL.startMiniDFSCluster(2); 078 TEST_UTIL.startMiniZKCluster(); 079 conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000); 080 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class, 081 LoadBalancer.class); 082 } 083 084 @AfterClass 085 public static void tearDownAfterClass() throws Exception { 086 TEST_UTIL.shutdownMiniCluster(); 087 } 088 089 @Before 090 public void setUp() throws Exception { 091 StartMiniClusterOption option = StartMiniClusterOption.builder() 092 .numMasters(2).numRegionServers(2).build(); 093 TEST_UTIL.startMiniHBaseCluster(option); 094 } 095 096 @After 097 public void after() throws Exception { 098 try { 099 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000); 100 // Some regionserver could fail to delete its znode. 101 // So shutdown could hang. Let's kill them all instead. 102 TEST_UTIL.getHBaseCluster().killAll(); 103 104 // Still need to clean things up 105 TEST_UTIL.shutdownMiniHBaseCluster(); 106 } finally { 107 TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true); 108 ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase"); 109 } 110 } 111 112 @Test 113 public void testRegionServerSessionExpired() throws Exception { 114 LOG.info("Starting " + name.getMethodName()); 115 TEST_UTIL.expireRegionServerSession(0); 116 testSanity(name.getMethodName()); 117 } 118 119 @Test 120 public void testMasterSessionExpired() throws Exception { 121 LOG.info("Starting " + name.getMethodName()); 122 TEST_UTIL.expireMasterSession(); 123 testSanity(name.getMethodName()); 124 } 125 126 /** 127 * Master recovery when the znode already exists. Internally, this 128 * test differs from {@link #testMasterSessionExpired} because here 129 * the master znode will exist in ZK. 130 */ 131 @Test 132 public void testMasterZKSessionRecoveryFailure() throws Exception { 133 LOG.info("Starting " + name.getMethodName()); 134 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 135 HMaster m = cluster.getMaster(); 136 m.abort("Test recovery from zk session expired", 137 new KeeperException.SessionExpiredException()); 138 assertTrue(m.isStopped()); // Master doesn't recover any more 139 testSanity(name.getMethodName()); 140 } 141 142 /** 143 * Make sure we can use the cluster 144 */ 145 private void testSanity(final String testName) throws Exception { 146 String tableName = testName + "_" + System.currentTimeMillis(); 147 TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)) 148 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build(); 149 LOG.info("Creating table " + tableName); 150 Admin admin = TEST_UTIL.getAdmin(); 151 try { 152 admin.createTable(desc); 153 } finally { 154 admin.close(); 155 } 156 157 Table table = TEST_UTIL.getConnection().getTable(desc.getTableName()); 158 Put put = new Put(Bytes.toBytes("testrow")); 159 put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata")); 160 LOG.info("Putting table " + tableName); 161 table.put(put); 162 table.close(); 163 } 164 165 /** 166 * Tests that the master does not call retainAssignment after recovery from expired zookeeper 167 * session. Without the HBASE-6046 fix master always tries to assign all the user regions by 168 * calling retainAssignment. 169 */ 170 @Test 171 public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception { 172 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 173 cluster.startRegionServer(); 174 cluster.waitForActiveAndReadyMaster(10000); 175 HMaster m = cluster.getMaster(); 176 final ZKWatcher zkw = m.getZooKeeper(); 177 // now the cluster is up. So assign some regions. 178 try (Admin admin = TEST_UTIL.getAdmin()) { 179 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"), 180 Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"), 181 Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") }; 182 TableDescriptor htd = 183 TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName())) 184 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 185 admin.createTable(htd, SPLIT_KEYS); 186 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 187 m.getZooKeeper().close(); 188 MockLoadBalancer.retainAssignCalled = false; 189 final int expectedNumOfListeners = countPermanentListeners(zkw); 190 m.abort("Test recovery from zk session expired", 191 new KeeperException.SessionExpiredException()); 192 assertTrue(m.isStopped()); // Master doesn't recover any more 193 // The recovered master should not call retainAssignment, as it is not a 194 // clean startup. 195 assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled); 196 // number of listeners should be same as the value before master aborted 197 // wait for new master is initialized 198 cluster.waitForActiveAndReadyMaster(120000); 199 final HMaster newMaster = cluster.getMasterThread().getMaster(); 200 assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper())); 201 } 202 } 203 204 /** 205 * Count listeners in zkw excluding listeners, that belongs to workers or other 206 * temporary processes. 207 */ 208 private int countPermanentListeners(ZKWatcher watcher) { 209 return countListeners(watcher, ZkSplitLogWorkerCoordination.class); 210 } 211 212 /** 213 * Count listeners in zkw excluding provided classes 214 */ 215 private int countListeners(ZKWatcher watcher, Class<?>... exclude) { 216 int cnt = 0; 217 for (Object o : watcher.getListeners()) { 218 boolean skip = false; 219 for (Class<?> aClass : exclude) { 220 if (aClass.isAssignableFrom(o.getClass())) { 221 skip = true; 222 break; 223 } 224 } 225 if (!skip) { 226 cnt += 1; 227 } 228 } 229 return cnt; 230 } 231 232 /** 233 * Tests whether the logs are split when master recovers from a expired zookeeper session and an 234 * RS goes down. 235 */ 236 @Test 237 public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception { 238 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 239 cluster.startRegionServer(); 240 TableName tableName = TableName.valueOf(name.getMethodName()); 241 byte[] family = Bytes.toBytes("col"); 242 try (Admin admin = TEST_UTIL.getAdmin()) { 243 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"), 244 Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") }; 245 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 246 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); 247 admin.createTable(htd, SPLIT_KEYS); 248 } 249 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 250 HMaster m = cluster.getMaster(); 251 try (Table table = TEST_UTIL.getConnection().getTable(tableName)) { 252 int numberOfPuts; 253 for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) { 254 Put p = new Put(Bytes.toBytes(numberOfPuts)); 255 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), 256 Bytes.toBytes("value" + numberOfPuts)); 257 table.put(p); 258 } 259 m.abort("Test recovery from zk session expired", 260 new KeeperException.SessionExpiredException()); 261 assertTrue(m.isStopped()); // Master doesn't recover any more 262 cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName()); 263 // Without patch for HBASE-6046 this test case will always timeout 264 // with patch the test case should pass. 265 int numberOfRows = 0; 266 try (ResultScanner scanner = table.getScanner(new Scan())) { 267 while (scanner.next() != null) { 268 numberOfRows++; 269 } 270 } 271 assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows); 272 } 273 } 274 275 static class MockLoadBalancer extends SimpleLoadBalancer { 276 static boolean retainAssignCalled = false; 277 278 @Override 279 public Map<ServerName, List<RegionInfo>> retainAssignment( 280 Map<RegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException { 281 retainAssignCalled = true; 282 return super.retainAssignment(regions, servers); 283 } 284 } 285 286} 287