001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.util.concurrent.Semaphore; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.hbase.ChoreService; 030import org.apache.hadoop.hbase.CoordinatedStateManager; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseTestingUtility; 033import org.apache.hadoop.hbase.Server; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.client.ClusterConnection; 036import org.apache.hadoop.hbase.client.Connection; 037import org.apache.hadoop.hbase.monitoring.MonitoredTask; 038import org.apache.hadoop.hbase.testclassification.MasterTests; 039import org.apache.hadoop.hbase.testclassification.MediumTests; 040import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; 041import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; 042import org.apache.hadoop.hbase.zookeeper.ZKListener; 043import org.apache.hadoop.hbase.zookeeper.ZKUtil; 044import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 045import org.apache.zookeeper.KeeperException; 046import org.junit.AfterClass; 047import org.junit.BeforeClass; 048import org.junit.ClassRule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051import org.mockito.Mockito; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * Test the {@link ActiveMasterManager}. 057 */ 058@Category({MasterTests.class, MediumTests.class}) 059public class TestActiveMasterManager { 060 061 @ClassRule 062 public static final HBaseClassTestRule CLASS_RULE = 063 HBaseClassTestRule.forClass(TestActiveMasterManager.class); 064 065 private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class); 066 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 067 068 @BeforeClass 069 public static void setUpBeforeClass() throws Exception { 070 TEST_UTIL.startMiniZKCluster(); 071 } 072 073 @AfterClass 074 public static void tearDownAfterClass() throws Exception { 075 TEST_UTIL.shutdownMiniZKCluster(); 076 } 077 078 @Test public void testRestartMaster() throws IOException, KeeperException { 079 ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(), 080 "testActiveMasterManagerFromZK", null, true); 081 try { 082 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 083 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 084 } catch(KeeperException.NoNodeException nne) {} 085 086 // Create the master node with a dummy address 087 ServerName master = ServerName.valueOf("localhost", 1, System.currentTimeMillis()); 088 // Should not have a master yet 089 DummyMaster dummyMaster = new DummyMaster(zk,master); 090 ClusterStatusTracker clusterStatusTracker = 091 dummyMaster.getClusterStatusTracker(); 092 ActiveMasterManager activeMasterManager = 093 dummyMaster.getActiveMasterManager(); 094 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 095 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 096 097 // First test becoming the active master uninterrupted 098 MonitoredTask status = Mockito.mock(MonitoredTask.class); 099 clusterStatusTracker.setClusterUp(); 100 101 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 102 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 103 assertMaster(zk, master); 104 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 105 106 // Now pretend master restart 107 DummyMaster secondDummyMaster = new DummyMaster(zk,master); 108 ActiveMasterManager secondActiveMasterManager = 109 secondDummyMaster.getActiveMasterManager(); 110 assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get()); 111 activeMasterManager.blockUntilBecomingActiveMaster(100, status); 112 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 113 assertMaster(zk, master); 114 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 115 assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get()); 116 } 117 118 /** 119 * Unit tests that uses ZooKeeper but does not use the master-side methods 120 * but rather acts directly on ZK. 121 * @throws Exception 122 */ 123 @Test 124 public void testActiveMasterManagerFromZK() throws Exception { 125 ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(), 126 "testActiveMasterManagerFromZK", null, true); 127 try { 128 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 129 ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode); 130 } catch(KeeperException.NoNodeException nne) {} 131 132 // Create the master node with a dummy address 133 ServerName firstMasterAddress = 134 ServerName.valueOf("localhost", 1, System.currentTimeMillis()); 135 ServerName secondMasterAddress = 136 ServerName.valueOf("localhost", 2, System.currentTimeMillis()); 137 138 // Should not have a master yet 139 DummyMaster ms1 = new DummyMaster(zk,firstMasterAddress); 140 ActiveMasterManager activeMasterManager = 141 ms1.getActiveMasterManager(); 142 assertFalse(activeMasterManager.clusterHasActiveMaster.get()); 143 assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); 144 145 // First test becoming the active master uninterrupted 146 ClusterStatusTracker clusterStatusTracker = 147 ms1.getClusterStatusTracker(); 148 clusterStatusTracker.setClusterUp(); 149 activeMasterManager.blockUntilBecomingActiveMaster(100, 150 Mockito.mock(MonitoredTask.class)); 151 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 152 assertMaster(zk, firstMasterAddress); 153 assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); 154 155 // New manager will now try to become the active master in another thread 156 WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress); 157 t.start(); 158 // Wait for this guy to figure out there is another active master 159 // Wait for 1 second at most 160 int sleeps = 0; 161 while(!t.manager.clusterHasActiveMaster.get() && sleeps < 100) { 162 Thread.sleep(10); 163 sleeps++; 164 } 165 166 // Both should see that there is an active master 167 assertTrue(activeMasterManager.clusterHasActiveMaster.get()); 168 assertTrue(t.manager.clusterHasActiveMaster.get()); 169 // But secondary one should not be the active master 170 assertFalse(t.isActiveMaster); 171 // Verify the active master ServerName is populated in standby master. 172 assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get()); 173 174 // Close the first server and delete it's master node 175 ms1.stop("stopping first server"); 176 177 // Use a listener to capture when the node is actually deleted 178 NodeDeletionListener listener = new NodeDeletionListener(zk, 179 zk.getZNodePaths().masterAddressZNode); 180 zk.registerListener(listener); 181 182 LOG.info("Deleting master node"); 183 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 184 185 // Wait for the node to be deleted 186 LOG.info("Waiting for active master manager to be notified"); 187 listener.waitForDeletion(); 188 LOG.info("Master node deleted"); 189 190 // Now we expect the secondary manager to have and be the active master 191 // Wait for 1 second at most 192 sleeps = 0; 193 while(!t.isActiveMaster && sleeps < 100) { 194 Thread.sleep(10); 195 sleeps++; 196 } 197 LOG.debug("Slept " + sleeps + " times"); 198 199 assertTrue(t.manager.clusterHasActiveMaster.get()); 200 assertTrue(t.isActiveMaster); 201 assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get()); 202 203 LOG.info("Deleting master node"); 204 205 ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode); 206 } 207 208 /** 209 * Assert there is an active master and that it has the specified address. 210 * @param zk single Zookeeper watcher 211 * @param expectedAddress the expected address of the master 212 * @throws KeeperException unexpected Zookeeper exception 213 * @throws IOException if an IO problem is encountered 214 */ 215 private void assertMaster(ZKWatcher zk, 216 ServerName expectedAddress) 217 throws KeeperException, IOException { 218 ServerName readAddress = MasterAddressTracker.getMasterAddress(zk); 219 assertNotNull(readAddress); 220 assertTrue(expectedAddress.equals(readAddress)); 221 } 222 223 public static class WaitToBeMasterThread extends Thread { 224 225 ActiveMasterManager manager; 226 DummyMaster dummyMaster; 227 boolean isActiveMaster; 228 229 public WaitToBeMasterThread(ZKWatcher zk, ServerName address) { 230 this.dummyMaster = new DummyMaster(zk,address); 231 this.manager = this.dummyMaster.getActiveMasterManager(); 232 isActiveMaster = false; 233 } 234 235 @Override 236 public void run() { 237 manager.blockUntilBecomingActiveMaster(100, 238 Mockito.mock(MonitoredTask.class)); 239 LOG.info("Second master has become the active master!"); 240 isActiveMaster = true; 241 } 242 } 243 244 public static class NodeDeletionListener extends ZKListener { 245 private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class); 246 247 private Semaphore lock; 248 private String node; 249 250 public NodeDeletionListener(ZKWatcher watcher, String node) { 251 super(watcher); 252 lock = new Semaphore(0); 253 this.node = node; 254 } 255 256 @Override 257 public void nodeDeleted(String path) { 258 if(path.equals(node)) { 259 LOG.debug("nodeDeleted(" + path + ")"); 260 lock.release(); 261 } 262 } 263 264 public void waitForDeletion() throws InterruptedException { 265 lock.acquire(); 266 } 267 } 268 269 /** 270 * Dummy Master Implementation. 271 */ 272 public static class DummyMaster implements Server { 273 private volatile boolean stopped; 274 private ClusterStatusTracker clusterStatusTracker; 275 private ActiveMasterManager activeMasterManager; 276 277 public DummyMaster(ZKWatcher zk, ServerName master) { 278 this.clusterStatusTracker = 279 new ClusterStatusTracker(zk, this); 280 clusterStatusTracker.start(); 281 282 this.activeMasterManager = 283 new ActiveMasterManager(zk, master, this); 284 zk.registerListener(activeMasterManager); 285 } 286 287 @Override 288 public void abort(final String msg, final Throwable t) {} 289 290 @Override 291 public boolean isAborted() { 292 return false; 293 } 294 295 @Override 296 public Configuration getConfiguration() { 297 return null; 298 } 299 300 @Override 301 public ZKWatcher getZooKeeper() { 302 return null; 303 } 304 305 @Override 306 public CoordinatedStateManager getCoordinatedStateManager() { 307 return null; 308 } 309 310 @Override 311 public ServerName getServerName() { 312 return null; 313 } 314 315 @Override 316 public boolean isStopped() { 317 return this.stopped; 318 } 319 320 @Override 321 public void stop(String why) { 322 this.stopped = true; 323 } 324 325 @Override 326 public ClusterConnection getConnection() { 327 return null; 328 } 329 330 public ClusterStatusTracker getClusterStatusTracker() { 331 return clusterStatusTracker; 332 } 333 334 public ActiveMasterManager getActiveMasterManager() { 335 return activeMasterManager; 336 } 337 338 @Override 339 public ChoreService getChoreService() { 340 return null; 341 } 342 343 @Override 344 public ClusterConnection getClusterConnection() { 345 // TODO Auto-generated method stub 346 return null; 347 } 348 349 @Override 350 public FileSystem getFileSystem() { 351 return null; 352 } 353 354 @Override 355 public boolean isStopping() { 356 return false; 357 } 358 359 @Override 360 public Connection createConnection(Configuration conf) throws IOException { 361 return null; 362 } 363 } 364}