001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import edu.umd.cs.findbugs.annotations.NonNull; 025import java.util.List; 026import java.util.Map; 027 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.hbase.client.Admin; 030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 031import org.apache.hadoop.hbase.client.Put; 032import org.apache.hadoop.hbase.client.RegionInfo; 033import org.apache.hadoop.hbase.client.ResultScanner; 034import org.apache.hadoop.hbase.client.Scan; 035import org.apache.hadoop.hbase.client.Table; 036import org.apache.hadoop.hbase.client.TableDescriptor; 037import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 038import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination; 039import org.apache.hadoop.hbase.master.HMaster; 040import org.apache.hadoop.hbase.master.LoadBalancer; 041import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer; 042import org.apache.hadoop.hbase.testclassification.MediumTests; 043import org.apache.hadoop.hbase.testclassification.MiscTests; 044import org.apache.hadoop.hbase.util.Bytes; 045import org.apache.hadoop.hbase.util.CommonFSUtils; 046import org.apache.hadoop.hbase.zookeeper.ZKUtil; 047import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 048import org.apache.zookeeper.KeeperException; 049import org.junit.After; 050import org.junit.AfterClass; 051import org.junit.Before; 052import org.junit.BeforeClass; 053import org.junit.ClassRule; 054import org.junit.Rule; 055import org.junit.Test; 056import org.junit.experimental.categories.Category; 057import org.junit.rules.TestName; 058import org.slf4j.Logger; 059import org.slf4j.LoggerFactory; 060 061@Category({MiscTests.class, MediumTests.class}) 062public class TestZooKeeper { 063 064 @ClassRule 065 public static final HBaseClassTestRule CLASS_RULE = 066 HBaseClassTestRule.forClass(TestZooKeeper.class); 067 068 private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class); 069 070 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 071 072 @Rule 073 public TestName name = new TestName(); 074 075 @BeforeClass 076 public static void setUpBeforeClass() throws Exception { 077 // Test we can first start the ZK cluster by itself 078 Configuration conf = TEST_UTIL.getConfiguration(); 079 // A couple of tests rely on master expiring ZK session, hence killing the only master. So it 080 // makes sense only for ZK registry. Enforcing it. 081 conf.set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY, 082 HConstants.ZK_CONNECTION_REGISTRY_CLASS); 083 TEST_UTIL.startMiniDFSCluster(2); 084 TEST_UTIL.startMiniZKCluster(); 085 conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000); 086 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class, 087 LoadBalancer.class); 088 TEST_UTIL.startMiniDFSCluster(2); 089 } 090 091 @AfterClass 092 public static void tearDownAfterClass() throws Exception { 093 TEST_UTIL.shutdownMiniCluster(); 094 } 095 096 @Before 097 public void setUp() throws Exception { 098 StartMiniClusterOption option = StartMiniClusterOption.builder() 099 .numMasters(2).numRegionServers(2).build(); 100 TEST_UTIL.startMiniHBaseCluster(option); 101 } 102 103 @After 104 public void after() throws Exception { 105 try { 106 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000); 107 // Some regionserver could fail to delete its znode. 108 // So shutdown could hang. Let's kill them all instead. 109 TEST_UTIL.getHBaseCluster().killAll(); 110 111 // Still need to clean things up 112 TEST_UTIL.shutdownMiniHBaseCluster(); 113 } finally { 114 TEST_UTIL.getTestFileSystem().delete(CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()), 115 true); 116 ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase"); 117 } 118 } 119 120 @Test 121 public void testRegionServerSessionExpired() throws Exception { 122 LOG.info("Starting " + name.getMethodName()); 123 TEST_UTIL.expireRegionServerSession(0); 124 testSanity(name.getMethodName()); 125 } 126 127 @Test 128 public void testMasterSessionExpired() throws Exception { 129 LOG.info("Starting " + name.getMethodName()); 130 TEST_UTIL.expireMasterSession(); 131 testSanity(name.getMethodName()); 132 } 133 134 /** 135 * Master recovery when the znode already exists. Internally, this 136 * test differs from {@link #testMasterSessionExpired} because here 137 * the master znode will exist in ZK. 138 */ 139 @Test 140 public void testMasterZKSessionRecoveryFailure() throws Exception { 141 LOG.info("Starting " + name.getMethodName()); 142 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 143 HMaster m = cluster.getMaster(); 144 m.abort("Test recovery from zk session expired", 145 new KeeperException.SessionExpiredException()); 146 assertTrue(m.isStopped()); // Master doesn't recover any more 147 testSanity(name.getMethodName()); 148 } 149 150 /** 151 * Make sure we can use the cluster 152 */ 153 private void testSanity(final String testName) throws Exception { 154 String tableName = testName + "_" + System.currentTimeMillis(); 155 TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)) 156 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build(); 157 LOG.info("Creating table " + tableName); 158 Admin admin = TEST_UTIL.getAdmin(); 159 try { 160 admin.createTable(desc); 161 } finally { 162 admin.close(); 163 } 164 165 Table table = TEST_UTIL.getConnection().getTable(desc.getTableName()); 166 Put put = new Put(Bytes.toBytes("testrow")); 167 put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata")); 168 LOG.info("Putting table " + tableName); 169 table.put(put); 170 table.close(); 171 } 172 173 /** 174 * Tests that the master does not call retainAssignment after recovery from expired zookeeper 175 * session. Without the HBASE-6046 fix master always tries to assign all the user regions by 176 * calling retainAssignment. 177 */ 178 @Test 179 public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception { 180 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 181 cluster.startRegionServer(); 182 cluster.waitForActiveAndReadyMaster(10000); 183 HMaster m = cluster.getMaster(); 184 final ZKWatcher zkw = m.getZooKeeper(); 185 // now the cluster is up. So assign some regions. 186 try (Admin admin = TEST_UTIL.getAdmin()) { 187 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"), 188 Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"), 189 Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") }; 190 TableDescriptor htd = 191 TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName())) 192 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 193 admin.createTable(htd, SPLIT_KEYS); 194 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 195 m.getZooKeeper().close(); 196 MockLoadBalancer.retainAssignCalled = false; 197 final int expectedNumOfListeners = countPermanentListeners(zkw); 198 m.abort("Test recovery from zk session expired", 199 new KeeperException.SessionExpiredException()); 200 assertTrue(m.isStopped()); // Master doesn't recover any more 201 // The recovered master should not call retainAssignment, as it is not a 202 // clean startup. 203 assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled); 204 // number of listeners should be same as the value before master aborted 205 // wait for new master is initialized 206 cluster.waitForActiveAndReadyMaster(120000); 207 final HMaster newMaster = cluster.getMasterThread().getMaster(); 208 assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper())); 209 } 210 } 211 212 /** 213 * Count listeners in zkw excluding listeners, that belongs to workers or other 214 * temporary processes. 215 */ 216 private int countPermanentListeners(ZKWatcher watcher) { 217 return countListeners(watcher, ZkSplitLogWorkerCoordination.class); 218 } 219 220 /** 221 * Count listeners in zkw excluding provided classes 222 */ 223 private int countListeners(ZKWatcher watcher, Class<?>... exclude) { 224 int cnt = 0; 225 for (Object o : watcher.getListeners()) { 226 boolean skip = false; 227 for (Class<?> aClass : exclude) { 228 if (aClass.isAssignableFrom(o.getClass())) { 229 skip = true; 230 break; 231 } 232 } 233 if (!skip) { 234 cnt += 1; 235 } 236 } 237 return cnt; 238 } 239 240 /** 241 * Tests whether the logs are split when master recovers from a expired zookeeper session and an 242 * RS goes down. 243 */ 244 @Test 245 public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception { 246 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 247 cluster.startRegionServer(); 248 TableName tableName = TableName.valueOf(name.getMethodName()); 249 byte[] family = Bytes.toBytes("col"); 250 try (Admin admin = TEST_UTIL.getAdmin()) { 251 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"), 252 Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") }; 253 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 254 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); 255 admin.createTable(htd, SPLIT_KEYS); 256 } 257 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 258 HMaster m = cluster.getMaster(); 259 try (Table table = TEST_UTIL.getConnection().getTable(tableName)) { 260 int numberOfPuts; 261 for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) { 262 Put p = new Put(Bytes.toBytes(numberOfPuts)); 263 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), 264 Bytes.toBytes("value" + numberOfPuts)); 265 table.put(p); 266 } 267 m.abort("Test recovery from zk session expired", 268 new KeeperException.SessionExpiredException()); 269 assertTrue(m.isStopped()); // Master doesn't recover any more 270 cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName()); 271 // Without patch for HBASE-6046 this test case will always timeout 272 // with patch the test case should pass. 273 int numberOfRows = 0; 274 try (ResultScanner scanner = table.getScanner(new Scan())) { 275 while (scanner.next() != null) { 276 numberOfRows++; 277 } 278 } 279 assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows); 280 } 281 } 282 283 static class MockLoadBalancer extends SimpleLoadBalancer { 284 static boolean retainAssignCalled = false; 285 286 @Override 287 @NonNull 288 public Map<ServerName, List<RegionInfo>> retainAssignment( 289 Map<RegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException { 290 retainAssignCalled = true; 291 return super.retainAssignment(regions, servers); 292 } 293 } 294 295} 296