001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertFalse; 022import static org.junit.jupiter.api.Assertions.assertNotNull; 023import static org.junit.jupiter.api.Assertions.assertTrue; 024import static org.mockito.ArgumentMatchers.any; 025import static org.mockito.Mockito.when; 026 027import java.io.IOException; 028import java.io.InterruptedIOException; 029import java.util.ArrayList; 030import java.util.List; 031import java.util.concurrent.Semaphore; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.hbase.HBaseTestingUtil; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.keymeta.KeyManagementService; 036import org.apache.hadoop.hbase.monitoring.MonitoredTask; 037import org.apache.hadoop.hbase.monitoring.TaskGroup; 038import org.apache.hadoop.hbase.testclassification.MasterTests; 039import org.apache.hadoop.hbase.testclassification.MediumTests; 040import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 041import org.apache.hadoop.hbase.util.MockServer; 042import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; 043import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; 044import org.apache.hadoop.hbase.zookeeper.ZKListener; 045import org.apache.hadoop.hbase.zookeeper.ZKUtil; 046import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 047import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 048import org.apache.zookeeper.KeeperException; 049import org.junit.jupiter.api.AfterAll; 050import org.junit.jupiter.api.BeforeAll; 051import org.junit.jupiter.api.Tag; 052import org.junit.jupiter.api.Test; 053import org.mockito.Mockito; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * Test the {@link ActiveMasterManager}. 059 */ 060@Tag(MasterTests.TAG) 061@Tag(MediumTests.TAG) 062public class TestActiveMasterManager { 063 064 private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class); 065 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 066 067 @BeforeAll 068 public static void setUpBeforeClass() throws Exception { 069 TEST_UTIL.startMiniZKCluster(); 070 } 071 072 @AfterAll 073 public static void tearDownAfterClass() throws Exception { 074 TEST_UTIL.shutdownMiniZKCluster(); 075 } 076 077 @Test 078 public void testRestartMaster() throws IOException, KeeperException { 079 try (ZKWatcher zk = 080 new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) { 081 try { 082 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 083 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 084 } catch (KeeperException.NoNodeException nne) { 085 } 086 087 // Create the master node with a dummy address 088 ServerName master = ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime()); 089 // Should not have a master yet 090 DummyMaster dummyMaster = new DummyMaster(zk, master); 091 ClusterStatusTracker clusterStatusTracker = dummyMaster.getClusterStatusTracker(); 092 ActiveMasterManager activeMasterManager = dummyMaster.getActiveMasterManager(); 093 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 094 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 095 096 // First test becoming the active master uninterrupted 097 TaskGroup status = mockTaskGroup(); 098 clusterStatusTracker.setClusterUp(); 099 100 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 101 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 102 assertMaster(zk, master); 103 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 104 105 // Now pretend master restart 106 DummyMaster secondDummyMaster = new DummyMaster(zk, master); 107 ActiveMasterManager secondActiveMasterManager = secondDummyMaster.getActiveMasterManager(); 108 assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get()); 109 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 110 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 111 assertMaster(zk, master); 112 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 113 assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get()); 114 } 115 } 116 117 /** 118 * Unit tests that uses ZooKeeper but does not use the master-side methods but rather acts 119 * directly on ZK. 120 */ 121 @Test 122 public void testActiveMasterManagerFromZK() throws Exception { 123 try (ZKWatcher zk = 124 new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) { 125 try { 126 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 127 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 128 } catch (KeeperException.NoNodeException nne) { 129 } 130 131 // Create the master node with a dummy address 132 ServerName firstMasterAddress = 133 ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime()); 134 ServerName secondMasterAddress = 135 ServerName.valueOf("localhost", 2, EnvironmentEdgeManager.currentTime()); 136 137 // Should not have a master yet 138 DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress); 139 ActiveMasterManager activeMasterManager = ms1.getActiveMasterManager(); 140 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 141 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 142 143 // First test becoming the active master uninterrupted 144 ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker(); 145 clusterStatusTracker.setClusterUp(); 146 147 activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); 148 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 149 assertMaster(zk, firstMasterAddress); 150 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 151 152 // New manager will now try to become the active master in another thread 153 WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress); 154 t.start(); 155 // Wait for this guy to figure out there is another active master 156 // Wait for 1 second at most 157 int sleeps = 0; 158 while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) { 159 Thread.sleep(10); 160 sleeps++; 161 } 162 163 // Both should see that there is an active master 164 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 165 assertTrue(t.manager.clusterHasActiveMaster.get()); 166 // But secondary one should not be the active master 167 assertFalse(t.isActiveMaster); 168 // Verify the active master ServerName is populated in standby master. 169 assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get()); 170 171 // Close the first server and delete it's master node 172 ms1.stop("stopping first server"); 173 174 // Use a listener to capture when the node is actually deleted 175 NodeDeletionListener listener = 176 new NodeDeletionListener(zk, zk.getZNodePaths().masterAddressZNode); 177 zk.registerListener(listener); 178 179 LOG.info("Deleting master node"); 180 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 181 182 // Wait for the node to be deleted 183 LOG.info("Waiting for active master manager to be notified"); 184 listener.waitForDeletion(); 185 LOG.info("Master node deleted"); 186 187 // Now we expect the secondary manager to have and be the active master 188 // Wait for 1 second at most 189 sleeps = 0; 190 while (!t.isActiveMaster && sleeps < 100) { 191 Thread.sleep(10); 192 sleeps++; 193 } 194 LOG.debug("Slept " + sleeps + " times"); 195 196 assertTrue(t.manager.clusterHasActiveMaster.get()); 197 assertTrue(t.isActiveMaster); 198 assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get()); 199 200 LOG.info("Deleting master node"); 201 202 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 203 } 204 } 205 206 @Test 207 public void testBackupMasterUpdates() throws Exception { 208 Configuration conf = TEST_UTIL.getConfiguration(); 209 try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) { 210 ServerName sn1 = ServerName.valueOf("localhost", 1, -1); 211 DummyMaster master1 = new DummyMaster(zk, sn1); 212 ActiveMasterManager activeMasterManager = master1.getActiveMasterManager(); 213 activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); 214 assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get()); 215 assertEquals(0, activeMasterManager.getBackupMasters().size()); 216 // Add backup masters 217 List<String> backupZNodes = new ArrayList<>(); 218 for (int i = 1; i <= 10; i++) { 219 ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1); 220 String backupZn = 221 ZNodePaths.joinZNode(zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString()); 222 backupZNodes.add(backupZn); 223 MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234); 224 TEST_UTIL.waitFor(10000, 225 () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size()); 226 } 227 // Remove backup masters 228 int numBackups = backupZNodes.size(); 229 for (String backupZNode : backupZNodes) { 230 ZKUtil.deleteNode(zk, backupZNode); 231 final int currentBackups = --numBackups; 232 TEST_UTIL.waitFor(10000, 233 () -> activeMasterManager.getBackupMasters().size() == currentBackups); 234 } 235 } 236 } 237 238 /** 239 * Assert there is an active master and that it has the specified address. 240 * @param zk single Zookeeper watcher 241 * @param expectedAddress the expected address of the master 242 * @throws KeeperException unexpected Zookeeper exception 243 * @throws IOException if an IO problem is encountered 244 */ 245 private void assertMaster(ZKWatcher zk, ServerName expectedAddress) 246 throws KeeperException, IOException { 247 ServerName readAddress = MasterAddressTracker.getMasterAddress(zk); 248 assertNotNull(readAddress); 249 assertEquals(expectedAddress, readAddress); 250 } 251 252 public static class WaitToBeMasterThread extends Thread { 253 254 ActiveMasterManager manager; 255 DummyMaster dummyMaster; 256 boolean isActiveMaster; 257 258 public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException { 259 this.dummyMaster = new DummyMaster(zk, address); 260 this.manager = this.dummyMaster.getActiveMasterManager(); 261 isActiveMaster = false; 262 } 263 264 @Override 265 public void run() { 266 manager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); 267 LOG.info("Second master has become the active master!"); 268 isActiveMaster = true; 269 } 270 } 271 272 private static TaskGroup mockTaskGroup() { 273 TaskGroup taskGroup = Mockito.mock(TaskGroup.class); 274 MonitoredTask task = Mockito.mock(MonitoredTask.class); 275 when(taskGroup.addTask(any())).thenReturn(task); 276 return taskGroup; 277 } 278 279 public static class NodeDeletionListener extends ZKListener { 280 private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class); 281 282 private Semaphore lock; 283 private String node; 284 285 public NodeDeletionListener(ZKWatcher watcher, String node) { 286 super(watcher); 287 lock = new Semaphore(0); 288 this.node = node; 289 } 290 291 @Override 292 public void nodeDeleted(String path) { 293 if (path.equals(node)) { 294 LOG.debug("nodeDeleted(" + path + ")"); 295 lock.release(); 296 } 297 } 298 299 public void waitForDeletion() throws InterruptedException { 300 lock.acquire(); 301 } 302 } 303 304 /** 305 * Dummy Master Implementation. 306 */ 307 public static class DummyMaster extends MockServer { 308 private ClusterStatusTracker clusterStatusTracker; 309 private ActiveMasterManager activeMasterManager; 310 311 public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException { 312 this.clusterStatusTracker = new ClusterStatusTracker(zk, this); 313 clusterStatusTracker.start(); 314 315 this.activeMasterManager = new ActiveMasterManager(zk, master, this); 316 zk.registerListener(activeMasterManager); 317 } 318 319 public ClusterStatusTracker getClusterStatusTracker() { 320 return clusterStatusTracker; 321 } 322 323 public ActiveMasterManager getActiveMasterManager() { 324 return activeMasterManager; 325 } 326 327 @Override 328 public KeyManagementService getKeyManagementService() { 329 return null; 330 } 331 } 332}