001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024import static org.mockito.ArgumentMatchers.any; 025import static org.mockito.Mockito.when; 026 027import java.io.IOException; 028import java.io.InterruptedIOException; 029import java.util.ArrayList; 030import java.util.List; 031import java.util.concurrent.Semaphore; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.hbase.HBaseClassTestRule; 034import org.apache.hadoop.hbase.HBaseTestingUtil; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.monitoring.MonitoredTask; 037import org.apache.hadoop.hbase.monitoring.TaskGroup; 038import org.apache.hadoop.hbase.testclassification.MasterTests; 039import org.apache.hadoop.hbase.testclassification.MediumTests; 040import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 041import org.apache.hadoop.hbase.util.MockServer; 042import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; 043import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; 044import org.apache.hadoop.hbase.zookeeper.ZKListener; 045import org.apache.hadoop.hbase.zookeeper.ZKUtil; 046import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 047import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 048import org.apache.zookeeper.KeeperException; 049import org.junit.AfterClass; 050import org.junit.BeforeClass; 051import org.junit.ClassRule; 052import org.junit.Test; 053import org.junit.experimental.categories.Category; 054import org.mockito.Mockito; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058/** 059 * Test the {@link ActiveMasterManager}. 060 */ 061@Category({ MasterTests.class, MediumTests.class }) 062public class TestActiveMasterManager { 063 064 @ClassRule 065 public static final HBaseClassTestRule CLASS_RULE = 066 HBaseClassTestRule.forClass(TestActiveMasterManager.class); 067 068 private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class); 069 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 070 071 @BeforeClass 072 public static void setUpBeforeClass() throws Exception { 073 TEST_UTIL.startMiniZKCluster(); 074 } 075 076 @AfterClass 077 public static void tearDownAfterClass() throws Exception { 078 TEST_UTIL.shutdownMiniZKCluster(); 079 } 080 081 @Test 082 public void testRestartMaster() throws IOException, KeeperException { 083 try (ZKWatcher zk = 084 new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) { 085 try { 086 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 087 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 088 } catch (KeeperException.NoNodeException nne) { 089 } 090 091 // Create the master node with a dummy address 092 ServerName master = ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime()); 093 // Should not have a master yet 094 DummyMaster dummyMaster = new DummyMaster(zk, master); 095 ClusterStatusTracker clusterStatusTracker = dummyMaster.getClusterStatusTracker(); 096 ActiveMasterManager activeMasterManager = dummyMaster.getActiveMasterManager(); 097 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 098 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 099 100 // First test becoming the active master uninterrupted 101 TaskGroup status = mockTaskGroup(); 102 clusterStatusTracker.setClusterUp(); 103 104 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 105 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 106 assertMaster(zk, master); 107 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 108 109 // Now pretend master restart 110 DummyMaster secondDummyMaster = new DummyMaster(zk, master); 111 ActiveMasterManager secondActiveMasterManager = secondDummyMaster.getActiveMasterManager(); 112 assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get()); 113 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 114 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 115 assertMaster(zk, master); 116 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 117 assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get()); 118 } 119 } 120 121 /** 122 * Unit tests that uses ZooKeeper but does not use the master-side methods but rather acts 123 * directly on ZK. 124 */ 125 @Test 126 public void testActiveMasterManagerFromZK() throws Exception { 127 try (ZKWatcher zk = 128 new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) { 129 try { 130 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 131 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 132 } catch (KeeperException.NoNodeException nne) { 133 } 134 135 // Create the master node with a dummy address 136 ServerName firstMasterAddress = 137 ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime()); 138 ServerName secondMasterAddress = 139 ServerName.valueOf("localhost", 2, EnvironmentEdgeManager.currentTime()); 140 141 // Should not have a master yet 142 DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress); 143 ActiveMasterManager activeMasterManager = ms1.getActiveMasterManager(); 144 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 145 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 146 147 // First test becoming the active master uninterrupted 148 ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker(); 149 clusterStatusTracker.setClusterUp(); 150 151 activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); 152 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 153 assertMaster(zk, firstMasterAddress); 154 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 155 156 // New manager will now try to become the active master in another thread 157 WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress); 158 t.start(); 159 // Wait for this guy to figure out there is another active master 160 // Wait for 1 second at most 161 int sleeps = 0; 162 while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) { 163 Thread.sleep(10); 164 sleeps++; 165 } 166 167 // Both should see that there is an active master 168 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 169 assertTrue(t.manager.clusterHasActiveMaster.get()); 170 // But secondary one should not be the active master 171 assertFalse(t.isActiveMaster); 172 // Verify the active master ServerName is populated in standby master. 173 assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get()); 174 175 // Close the first server and delete it's master node 176 ms1.stop("stopping first server"); 177 178 // Use a listener to capture when the node is actually deleted 179 NodeDeletionListener listener = 180 new NodeDeletionListener(zk, zk.getZNodePaths().masterAddressZNode); 181 zk.registerListener(listener); 182 183 LOG.info("Deleting master node"); 184 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 185 186 // Wait for the node to be deleted 187 LOG.info("Waiting for active master manager to be notified"); 188 listener.waitForDeletion(); 189 LOG.info("Master node deleted"); 190 191 // Now we expect the secondary manager to have and be the active master 192 // Wait for 1 second at most 193 sleeps = 0; 194 while (!t.isActiveMaster && sleeps < 100) { 195 Thread.sleep(10); 196 sleeps++; 197 } 198 LOG.debug("Slept " + sleeps + " times"); 199 200 assertTrue(t.manager.clusterHasActiveMaster.get()); 201 assertTrue(t.isActiveMaster); 202 assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get()); 203 204 LOG.info("Deleting master node"); 205 206 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 207 } 208 } 209 210 @Test 211 public void testBackupMasterUpdates() throws Exception { 212 Configuration conf = TEST_UTIL.getConfiguration(); 213 try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) { 214 ServerName sn1 = ServerName.valueOf("localhost", 1, -1); 215 DummyMaster master1 = new DummyMaster(zk, sn1); 216 ActiveMasterManager activeMasterManager = master1.getActiveMasterManager(); 217 activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); 218 assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get()); 219 assertEquals(0, activeMasterManager.getBackupMasters().size()); 220 // Add backup masters 221 List<String> backupZNodes = new ArrayList<>(); 222 for (int i = 1; i <= 10; i++) { 223 ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1); 224 String backupZn = 225 ZNodePaths.joinZNode(zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString()); 226 backupZNodes.add(backupZn); 227 MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234); 228 TEST_UTIL.waitFor(10000, 229 () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size()); 230 } 231 // Remove backup masters 232 int numBackups = backupZNodes.size(); 233 for (String backupZNode : backupZNodes) { 234 ZKUtil.deleteNode(zk, backupZNode); 235 final int currentBackups = --numBackups; 236 TEST_UTIL.waitFor(10000, 237 () -> activeMasterManager.getBackupMasters().size() == currentBackups); 238 } 239 } 240 } 241 242 /** 243 * Assert there is an active master and that it has the specified address. 244 * @param zk single Zookeeper watcher 245 * @param expectedAddress the expected address of the master 246 * @throws KeeperException unexpected Zookeeper exception 247 * @throws IOException if an IO problem is encountered 248 */ 249 private void assertMaster(ZKWatcher zk, ServerName expectedAddress) 250 throws KeeperException, IOException { 251 ServerName readAddress = MasterAddressTracker.getMasterAddress(zk); 252 assertNotNull(readAddress); 253 assertEquals(expectedAddress, readAddress); 254 } 255 256 public static class WaitToBeMasterThread extends Thread { 257 258 ActiveMasterManager manager; 259 DummyMaster dummyMaster; 260 boolean isActiveMaster; 261 262 public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException { 263 this.dummyMaster = new DummyMaster(zk, address); 264 this.manager = this.dummyMaster.getActiveMasterManager(); 265 isActiveMaster = false; 266 } 267 268 @Override 269 public void run() { 270 manager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); 271 LOG.info("Second master has become the active master!"); 272 isActiveMaster = true; 273 } 274 } 275 276 private static TaskGroup mockTaskGroup() { 277 TaskGroup taskGroup = Mockito.mock(TaskGroup.class); 278 MonitoredTask task = Mockito.mock(MonitoredTask.class); 279 when(taskGroup.addTask(any())).thenReturn(task); 280 return taskGroup; 281 } 282 283 public static class NodeDeletionListener extends ZKListener { 284 private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class); 285 286 private Semaphore lock; 287 private String node; 288 289 public NodeDeletionListener(ZKWatcher watcher, String node) { 290 super(watcher); 291 lock = new Semaphore(0); 292 this.node = node; 293 } 294 295 @Override 296 public void nodeDeleted(String path) { 297 if (path.equals(node)) { 298 LOG.debug("nodeDeleted(" + path + ")"); 299 lock.release(); 300 } 301 } 302 303 public void waitForDeletion() throws InterruptedException { 304 lock.acquire(); 305 } 306 } 307 308 /** 309 * Dummy Master Implementation. 310 */ 311 public static class DummyMaster extends MockServer { 312 private ClusterStatusTracker clusterStatusTracker; 313 private ActiveMasterManager activeMasterManager; 314 315 public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException { 316 this.clusterStatusTracker = new ClusterStatusTracker(zk, this); 317 clusterStatusTracker.start(); 318 319 this.activeMasterManager = new ActiveMasterManager(zk, master, this); 320 zk.registerListener(activeMasterManager); 321 } 322 323 public ClusterStatusTracker getClusterStatusTracker() { 324 return clusterStatusTracker; 325 } 326 327 public ActiveMasterManager getActiveMasterManager() { 328 return activeMasterManager; 329 } 330 } 331}