001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.io.InterruptedIOException; 027import java.util.ArrayList; 028import java.util.List; 029import java.util.concurrent.Semaphore; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.hbase.ChoreService; 033import org.apache.hadoop.hbase.CoordinatedStateManager; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.Server; 037import org.apache.hadoop.hbase.ServerName; 038import org.apache.hadoop.hbase.client.ClusterConnection; 039import org.apache.hadoop.hbase.client.Connection; 040import org.apache.hadoop.hbase.monitoring.MonitoredTask; 041import org.apache.hadoop.hbase.testclassification.MasterTests; 042import org.apache.hadoop.hbase.testclassification.MediumTests; 043import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; 044import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; 045import org.apache.hadoop.hbase.zookeeper.ZKListener; 046import org.apache.hadoop.hbase.zookeeper.ZKUtil; 047import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 048import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 049import org.apache.zookeeper.KeeperException; 050import org.junit.AfterClass; 051import org.junit.BeforeClass; 052import org.junit.ClassRule; 053import org.junit.Test; 054import org.junit.experimental.categories.Category; 055import org.mockito.Mockito; 056import org.slf4j.Logger; 057import org.slf4j.LoggerFactory; 058 059/** 060 * Test the {@link ActiveMasterManager}. 061 */ 062@Category({MasterTests.class, MediumTests.class}) 063public class TestActiveMasterManager { 064 065 @ClassRule 066 public static final HBaseClassTestRule CLASS_RULE = 067 HBaseClassTestRule.forClass(TestActiveMasterManager.class); 068 069 private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class); 070 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 071 072 @BeforeClass 073 public static void setUpBeforeClass() throws Exception { 074 TEST_UTIL.startMiniZKCluster(); 075 } 076 077 @AfterClass 078 public static void tearDownAfterClass() throws Exception { 079 TEST_UTIL.shutdownMiniZKCluster(); 080 } 081 082 @Test public void testRestartMaster() throws IOException, KeeperException { 083 try (ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(), 084 "testActiveMasterManagerFromZK", null, true)) { 085 try { 086 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 087 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 088 } catch (KeeperException.NoNodeException nne) { 089 } 090 091 // Create the master node with a dummy address 092 ServerName master = ServerName.valueOf("localhost", 1, System.currentTimeMillis()); 093 // Should not have a master yet 094 DummyMaster dummyMaster = new DummyMaster(zk, master); 095 ClusterStatusTracker clusterStatusTracker = 096 dummyMaster.getClusterStatusTracker(); 097 ActiveMasterManager activeMasterManager = 098 dummyMaster.getActiveMasterManager(); 099 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 100 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 101 102 // First test becoming the active master uninterrupted 103 MonitoredTask status = Mockito.mock(MonitoredTask.class); 104 clusterStatusTracker.setClusterUp(); 105 106 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 107 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 108 assertMaster(zk, master); 109 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 110 111 // Now pretend master restart 112 DummyMaster secondDummyMaster = new DummyMaster(zk, master); 113 ActiveMasterManager secondActiveMasterManager = 114 secondDummyMaster.getActiveMasterManager(); 115 assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get()); 116 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 117 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 118 assertMaster(zk, master); 119 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 120 assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get()); 121 } 122 } 123 124 /** 125 * Unit tests that uses ZooKeeper but does not use the master-side methods 126 * but rather acts directly on ZK. 127 * @throws Exception 128 */ 129 @Test 130 public void testActiveMasterManagerFromZK() throws Exception { 131 try (ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(), 132 "testActiveMasterManagerFromZK", null, true)) { 133 try { 134 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 135 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 136 } catch (KeeperException.NoNodeException nne) { 137 } 138 139 // Create the master node with a dummy address 140 ServerName firstMasterAddress = 141 ServerName.valueOf("localhost", 1, System.currentTimeMillis()); 142 ServerName secondMasterAddress = 143 ServerName.valueOf("localhost", 2, System.currentTimeMillis()); 144 145 // Should not have a master yet 146 DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress); 147 ActiveMasterManager activeMasterManager = 148 ms1.getActiveMasterManager(); 149 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 150 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 151 152 // First test becoming the active master uninterrupted 153 ClusterStatusTracker clusterStatusTracker = 154 ms1.getClusterStatusTracker(); 155 clusterStatusTracker.setClusterUp(); 156 activeMasterManager.blockUntilBecomingActiveMaster(100, 157 Mockito.mock(MonitoredTask.class)); 158 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 159 assertMaster(zk, firstMasterAddress); 160 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 161 162 // New manager will now try to become the active master in another thread 163 WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress); 164 t.start(); 165 // Wait for this guy to figure out there is another active master 166 // Wait for 1 second at most 167 int sleeps = 0; 168 while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) { 169 Thread.sleep(10); 170 sleeps++; 171 } 172 173 // Both should see that there is an active master 174 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 175 assertTrue(t.manager.clusterHasActiveMaster.get()); 176 // But secondary one should not be the active master 177 assertFalse(t.isActiveMaster); 178 // Verify the active master ServerName is populated in standby master. 179 assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get()); 180 181 // Close the first server and delete it's master node 182 ms1.stop("stopping first server"); 183 184 // Use a listener to capture when the node is actually deleted 185 NodeDeletionListener listener = new NodeDeletionListener(zk, 186 zk.getZNodePaths().masterAddressZNode); 187 zk.registerListener(listener); 188 189 LOG.info("Deleting master node"); 190 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 191 192 // Wait for the node to be deleted 193 LOG.info("Waiting for active master manager to be notified"); 194 listener.waitForDeletion(); 195 LOG.info("Master node deleted"); 196 197 // Now we expect the secondary manager to have and be the active master 198 // Wait for 1 second at most 199 sleeps = 0; 200 while (!t.isActiveMaster && sleeps < 100) { 201 Thread.sleep(10); 202 sleeps++; 203 } 204 LOG.debug("Slept " + sleeps + " times"); 205 206 assertTrue(t.manager.clusterHasActiveMaster.get()); 207 assertTrue(t.isActiveMaster); 208 assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get()); 209 210 LOG.info("Deleting master node"); 211 212 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 213 } 214 } 215 216 @Test 217 public void testBackupMasterUpdates() throws Exception { 218 Configuration conf = TEST_UTIL.getConfiguration(); 219 try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) { 220 ServerName sn1 = ServerName.valueOf("localhost", 1, -1); 221 DummyMaster master1 = new DummyMaster(zk, sn1); 222 ActiveMasterManager activeMasterManager = master1.getActiveMasterManager(); 223 activeMasterManager.blockUntilBecomingActiveMaster(100, 224 Mockito.mock(MonitoredTask.class)); 225 assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get()); 226 assertEquals(0, activeMasterManager.getBackupMasters().size()); 227 // Add backup masters 228 List<String> backupZNodes = new ArrayList<>(); 229 for (int i = 1; i <= 10; i++) { 230 ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1); 231 String backupZn = ZNodePaths.joinZNode( 232 zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString()); 233 backupZNodes.add(backupZn); 234 MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234); 235 TEST_UTIL.waitFor(10000, 236 () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size()); 237 } 238 // Remove backup masters 239 int numBackups = backupZNodes.size(); 240 for (String backupZNode: backupZNodes) { 241 ZKUtil.deleteNode(zk, backupZNode); 242 final int currentBackups = --numBackups; 243 TEST_UTIL.waitFor(10000, 244 () -> activeMasterManager.getBackupMasters().size() == currentBackups); 245 } 246 } 247 } 248 249 /** 250 * Assert there is an active master and that it has the specified address. 251 * @param zk single Zookeeper watcher 252 * @param expectedAddress the expected address of the master 253 * @throws KeeperException unexpected Zookeeper exception 254 * @throws IOException if an IO problem is encountered 255 */ 256 private void assertMaster(ZKWatcher zk, ServerName expectedAddress) throws 257 KeeperException, IOException { 258 ServerName readAddress = MasterAddressTracker.getMasterAddress(zk); 259 assertNotNull(readAddress); 260 assertEquals(expectedAddress, readAddress); 261 } 262 263 public static class WaitToBeMasterThread extends Thread { 264 265 ActiveMasterManager manager; 266 DummyMaster dummyMaster; 267 boolean isActiveMaster; 268 269 public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException { 270 this.dummyMaster = new DummyMaster(zk,address); 271 this.manager = this.dummyMaster.getActiveMasterManager(); 272 isActiveMaster = false; 273 } 274 275 @Override 276 public void run() { 277 manager.blockUntilBecomingActiveMaster(100, 278 Mockito.mock(MonitoredTask.class)); 279 LOG.info("Second master has become the active master!"); 280 isActiveMaster = true; 281 } 282 } 283 284 public static class NodeDeletionListener extends ZKListener { 285 private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class); 286 287 private Semaphore lock; 288 private String node; 289 290 public NodeDeletionListener(ZKWatcher watcher, String node) { 291 super(watcher); 292 lock = new Semaphore(0); 293 this.node = node; 294 } 295 296 @Override 297 public void nodeDeleted(String path) { 298 if(path.equals(node)) { 299 LOG.debug("nodeDeleted(" + path + ")"); 300 lock.release(); 301 } 302 } 303 304 public void waitForDeletion() throws InterruptedException { 305 lock.acquire(); 306 } 307 } 308 309 /** 310 * Dummy Master Implementation. 311 */ 312 public static class DummyMaster implements Server { 313 private volatile boolean stopped; 314 private ClusterStatusTracker clusterStatusTracker; 315 private ActiveMasterManager activeMasterManager; 316 317 public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException { 318 this.clusterStatusTracker = 319 new ClusterStatusTracker(zk, this); 320 clusterStatusTracker.start(); 321 322 this.activeMasterManager = 323 new ActiveMasterManager(zk, master, this); 324 zk.registerListener(activeMasterManager); 325 } 326 327 @Override 328 public void abort(final String msg, final Throwable t) {} 329 330 @Override 331 public boolean isAborted() { 332 return false; 333 } 334 335 @Override 336 public Configuration getConfiguration() { 337 return null; 338 } 339 340 @Override 341 public ZKWatcher getZooKeeper() { 342 return null; 343 } 344 345 @Override 346 public CoordinatedStateManager getCoordinatedStateManager() { 347 return null; 348 } 349 350 @Override 351 public ServerName getServerName() { 352 return null; 353 } 354 355 @Override 356 public boolean isStopped() { 357 return this.stopped; 358 } 359 360 @Override 361 public void stop(String why) { 362 this.stopped = true; 363 } 364 365 @Override 366 public ClusterConnection getConnection() { 367 return null; 368 } 369 370 public ClusterStatusTracker getClusterStatusTracker() { 371 return clusterStatusTracker; 372 } 373 374 public ActiveMasterManager getActiveMasterManager() { 375 return activeMasterManager; 376 } 377 378 @Override 379 public ChoreService getChoreService() { 380 return null; 381 } 382 383 @Override 384 public ClusterConnection getClusterConnection() { 385 // TODO Auto-generated method stub 386 return null; 387 } 388 389 @Override 390 public FileSystem getFileSystem() { 391 return null; 392 } 393 394 @Override 395 public boolean isStopping() { 396 return false; 397 } 398 399 @Override 400 public Connection createConnection(Configuration conf) throws IOException { 401 return null; 402 } 403 } 404}