001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.io.InterruptedIOException; 027import java.util.ArrayList; 028import java.util.List; 029import java.util.concurrent.Semaphore; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.hbase.ChoreService; 033import org.apache.hadoop.hbase.CoordinatedStateManager; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.Server; 037import org.apache.hadoop.hbase.ServerName; 038import org.apache.hadoop.hbase.client.ClusterConnection; 039import org.apache.hadoop.hbase.client.Connection; 040import org.apache.hadoop.hbase.monitoring.MonitoredTask; 041import org.apache.hadoop.hbase.testclassification.MasterTests; 042import org.apache.hadoop.hbase.testclassification.MediumTests; 043import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 044import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; 045import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; 046import org.apache.hadoop.hbase.zookeeper.ZKListener; 047import org.apache.hadoop.hbase.zookeeper.ZKUtil; 048import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 049import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 050import org.apache.zookeeper.KeeperException; 051import org.junit.AfterClass; 052import org.junit.BeforeClass; 053import org.junit.ClassRule; 054import org.junit.Test; 055import org.junit.experimental.categories.Category; 056import org.mockito.Mockito; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060/** 061 * Test the {@link ActiveMasterManager}. 062 */ 063@Category({ MasterTests.class, MediumTests.class }) 064public class TestActiveMasterManager { 065 066 @ClassRule 067 public static final HBaseClassTestRule CLASS_RULE = 068 HBaseClassTestRule.forClass(TestActiveMasterManager.class); 069 070 private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class); 071 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 072 073 @BeforeClass 074 public static void setUpBeforeClass() throws Exception { 075 TEST_UTIL.startMiniZKCluster(); 076 } 077 078 @AfterClass 079 public static void tearDownAfterClass() throws Exception { 080 TEST_UTIL.shutdownMiniZKCluster(); 081 } 082 083 @Test 084 public void testRestartMaster() throws IOException, KeeperException { 085 try (ZKWatcher zk = 086 new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) { 087 try { 088 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 089 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 090 } catch (KeeperException.NoNodeException nne) { 091 } 092 093 // Create the master node with a dummy address 094 ServerName master = ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime()); 095 // Should not have a master yet 096 DummyMaster dummyMaster = new DummyMaster(zk, master); 097 ClusterStatusTracker clusterStatusTracker = dummyMaster.getClusterStatusTracker(); 098 ActiveMasterManager activeMasterManager = dummyMaster.getActiveMasterManager(); 099 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 100 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 101 102 // First test becoming the active master uninterrupted 103 MonitoredTask status = Mockito.mock(MonitoredTask.class); 104 clusterStatusTracker.setClusterUp(); 105 106 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 107 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 108 assertMaster(zk, master); 109 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 110 111 // Now pretend master restart 112 DummyMaster secondDummyMaster = new DummyMaster(zk, master); 113 ActiveMasterManager secondActiveMasterManager = secondDummyMaster.getActiveMasterManager(); 114 assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get()); 115 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 116 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 117 assertMaster(zk, master); 118 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 119 assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get()); 120 } 121 } 122 123 /** 124 * Unit tests that uses ZooKeeper but does not use the master-side methods but rather acts 125 * directly on ZK. n 126 */ 127 @Test 128 public void testActiveMasterManagerFromZK() throws Exception { 129 try (ZKWatcher zk = 130 new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) { 131 try { 132 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 133 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 134 } catch (KeeperException.NoNodeException nne) { 135 } 136 137 // Create the master node with a dummy address 138 ServerName firstMasterAddress = 139 ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime()); 140 ServerName secondMasterAddress = 141 ServerName.valueOf("localhost", 2, EnvironmentEdgeManager.currentTime()); 142 143 // Should not have a master yet 144 DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress); 145 ActiveMasterManager activeMasterManager = ms1.getActiveMasterManager(); 146 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 147 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 148 149 // First test becoming the active master uninterrupted 150 ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker(); 151 clusterStatusTracker.setClusterUp(); 152 activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class)); 153 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 154 assertMaster(zk, firstMasterAddress); 155 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 156 157 // New manager will now try to become the active master in another thread 158 WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress); 159 t.start(); 160 // Wait for this guy to figure out there is another active master 161 // Wait for 1 second at most 162 int sleeps = 0; 163 while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) { 164 Thread.sleep(10); 165 sleeps++; 166 } 167 168 // Both should see that there is an active master 169 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 170 assertTrue(t.manager.clusterHasActiveMaster.get()); 171 // But secondary one should not be the active master 172 assertFalse(t.isActiveMaster); 173 // Verify the active master ServerName is populated in standby master. 174 assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get()); 175 176 // Close the first server and delete it's master node 177 ms1.stop("stopping first server"); 178 179 // Use a listener to capture when the node is actually deleted 180 NodeDeletionListener listener = 181 new NodeDeletionListener(zk, zk.getZNodePaths().masterAddressZNode); 182 zk.registerListener(listener); 183 184 LOG.info("Deleting master node"); 185 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 186 187 // Wait for the node to be deleted 188 LOG.info("Waiting for active master manager to be notified"); 189 listener.waitForDeletion(); 190 LOG.info("Master node deleted"); 191 192 // Now we expect the secondary manager to have and be the active master 193 // Wait for 1 second at most 194 sleeps = 0; 195 while (!t.isActiveMaster && sleeps < 100) { 196 Thread.sleep(10); 197 sleeps++; 198 } 199 LOG.debug("Slept " + sleeps + " times"); 200 201 assertTrue(t.manager.clusterHasActiveMaster.get()); 202 assertTrue(t.isActiveMaster); 203 assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get()); 204 205 LOG.info("Deleting master node"); 206 207 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 208 } 209 } 210 211 @Test 212 public void testBackupMasterUpdates() throws Exception { 213 Configuration conf = TEST_UTIL.getConfiguration(); 214 try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) { 215 ServerName sn1 = ServerName.valueOf("localhost", 1, -1); 216 DummyMaster master1 = new DummyMaster(zk, sn1); 217 ActiveMasterManager activeMasterManager = master1.getActiveMasterManager(); 218 activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class)); 219 assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get()); 220 assertEquals(0, activeMasterManager.getBackupMasters().size()); 221 // Add backup masters 222 List<String> backupZNodes = new ArrayList<>(); 223 for (int i = 1; i <= 10; i++) { 224 ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1); 225 String backupZn = 226 ZNodePaths.joinZNode(zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString()); 227 backupZNodes.add(backupZn); 228 MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234); 229 TEST_UTIL.waitFor(10000, 230 () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size()); 231 } 232 // Remove backup masters 233 int numBackups = backupZNodes.size(); 234 for (String backupZNode : backupZNodes) { 235 ZKUtil.deleteNode(zk, backupZNode); 236 final int currentBackups = --numBackups; 237 TEST_UTIL.waitFor(10000, 238 () -> activeMasterManager.getBackupMasters().size() == currentBackups); 239 } 240 } 241 } 242 243 /** 244 * Assert there is an active master and that it has the specified address. 245 * @param zk single Zookeeper watcher 246 * @param expectedAddress the expected address of the master 247 * @throws KeeperException unexpected Zookeeper exception 248 * @throws IOException if an IO problem is encountered 249 */ 250 private void assertMaster(ZKWatcher zk, ServerName expectedAddress) 251 throws KeeperException, IOException { 252 ServerName readAddress = MasterAddressTracker.getMasterAddress(zk); 253 assertNotNull(readAddress); 254 assertEquals(expectedAddress, readAddress); 255 } 256 257 public static class WaitToBeMasterThread extends Thread { 258 259 ActiveMasterManager manager; 260 DummyMaster dummyMaster; 261 boolean isActiveMaster; 262 263 public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException { 264 this.dummyMaster = new DummyMaster(zk, address); 265 this.manager = this.dummyMaster.getActiveMasterManager(); 266 isActiveMaster = false; 267 } 268 269 @Override 270 public void run() { 271 manager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class)); 272 LOG.info("Second master has become the active master!"); 273 isActiveMaster = true; 274 } 275 } 276 277 public static class NodeDeletionListener extends ZKListener { 278 private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class); 279 280 private Semaphore lock; 281 private String node; 282 283 public NodeDeletionListener(ZKWatcher watcher, String node) { 284 super(watcher); 285 lock = new Semaphore(0); 286 this.node = node; 287 } 288 289 @Override 290 public void nodeDeleted(String path) { 291 if (path.equals(node)) { 292 LOG.debug("nodeDeleted(" + path + ")"); 293 lock.release(); 294 } 295 } 296 297 public void waitForDeletion() throws InterruptedException { 298 lock.acquire(); 299 } 300 } 301 302 /** 303 * Dummy Master Implementation. 304 */ 305 public static class DummyMaster implements Server { 306 private volatile boolean stopped; 307 private ClusterStatusTracker clusterStatusTracker; 308 private ActiveMasterManager activeMasterManager; 309 310 public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException { 311 this.clusterStatusTracker = new ClusterStatusTracker(zk, this); 312 clusterStatusTracker.start(); 313 314 this.activeMasterManager = new ActiveMasterManager(zk, master, this); 315 zk.registerListener(activeMasterManager); 316 } 317 318 @Override 319 public void abort(final String msg, final Throwable t) { 320 } 321 322 @Override 323 public boolean isAborted() { 324 return false; 325 } 326 327 @Override 328 public Configuration getConfiguration() { 329 return null; 330 } 331 332 @Override 333 public ZKWatcher getZooKeeper() { 334 return null; 335 } 336 337 @Override 338 public CoordinatedStateManager getCoordinatedStateManager() { 339 return null; 340 } 341 342 @Override 343 public ServerName getServerName() { 344 return null; 345 } 346 347 @Override 348 public boolean isStopped() { 349 return this.stopped; 350 } 351 352 @Override 353 public void stop(String why) { 354 this.stopped = true; 355 } 356 357 @Override 358 public ClusterConnection getConnection() { 359 return null; 360 } 361 362 public ClusterStatusTracker getClusterStatusTracker() { 363 return clusterStatusTracker; 364 } 365 366 public ActiveMasterManager getActiveMasterManager() { 367 return activeMasterManager; 368 } 369 370 @Override 371 public ChoreService getChoreService() { 372 return null; 373 } 374 375 @Override 376 public ClusterConnection getClusterConnection() { 377 // TODO Auto-generated method stub 378 return null; 379 } 380 381 @Override 382 public FileSystem getFileSystem() { 383 return null; 384 } 385 386 @Override 387 public boolean isStopping() { 388 return false; 389 } 390 391 @Override 392 public Connection createConnection(Configuration conf) throws IOException { 393 return null; 394 } 395 } 396}