001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.io.InterruptedIOException;
027import java.util.ArrayList;
028import java.util.List;
029import java.util.concurrent.Semaphore;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.hbase.ChoreService;
033import org.apache.hadoop.hbase.CoordinatedStateManager;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtility;
036import org.apache.hadoop.hbase.Server;
037import org.apache.hadoop.hbase.ServerName;
038import org.apache.hadoop.hbase.client.ClusterConnection;
039import org.apache.hadoop.hbase.client.Connection;
040import org.apache.hadoop.hbase.monitoring.MonitoredTask;
041import org.apache.hadoop.hbase.testclassification.MasterTests;
042import org.apache.hadoop.hbase.testclassification.MediumTests;
043import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
044import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
045import org.apache.hadoop.hbase.zookeeper.ZKListener;
046import org.apache.hadoop.hbase.zookeeper.ZKUtil;
047import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
048import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
049import org.apache.zookeeper.KeeperException;
050import org.junit.AfterClass;
051import org.junit.BeforeClass;
052import org.junit.ClassRule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.mockito.Mockito;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059/**
060 * Test the {@link ActiveMasterManager}.
061 */
062@Category({MasterTests.class, MediumTests.class})
063public class TestActiveMasterManager {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067      HBaseClassTestRule.forClass(TestActiveMasterManager.class);
068
069  private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class);
070  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
071
072  @BeforeClass
073  public static void setUpBeforeClass() throws Exception {
074    TEST_UTIL.startMiniZKCluster();
075  }
076
077  @AfterClass
078  public static void tearDownAfterClass() throws Exception {
079    TEST_UTIL.shutdownMiniZKCluster();
080  }
081
082  @Test public void testRestartMaster() throws IOException, KeeperException {
083    try (ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(),
084      "testActiveMasterManagerFromZK", null, true)) {
085      try {
086        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
087        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
088      } catch (KeeperException.NoNodeException nne) {
089      }
090
091      // Create the master node with a dummy address
092      ServerName master = ServerName.valueOf("localhost", 1, System.currentTimeMillis());
093      // Should not have a master yet
094      DummyMaster dummyMaster = new DummyMaster(zk, master);
095      ClusterStatusTracker clusterStatusTracker =
096          dummyMaster.getClusterStatusTracker();
097      ActiveMasterManager activeMasterManager =
098          dummyMaster.getActiveMasterManager();
099      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
100      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
101
102      // First test becoming the active master uninterrupted
103      MonitoredTask status = Mockito.mock(MonitoredTask.class);
104      clusterStatusTracker.setClusterUp();
105
106      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
107      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
108      assertMaster(zk, master);
109      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
110
111      // Now pretend master restart
112      DummyMaster secondDummyMaster = new DummyMaster(zk, master);
113      ActiveMasterManager secondActiveMasterManager =
114          secondDummyMaster.getActiveMasterManager();
115      assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
116      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
117      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
118      assertMaster(zk, master);
119      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
120      assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get());
121    }
122  }
123
124  /**
125   * Unit tests that uses ZooKeeper but does not use the master-side methods
126   * but rather acts directly on ZK.
127   * @throws Exception
128   */
129  @Test
130  public void testActiveMasterManagerFromZK() throws Exception {
131    try (ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(),
132      "testActiveMasterManagerFromZK", null, true)) {
133      try {
134        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
135        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
136      } catch (KeeperException.NoNodeException nne) {
137      }
138
139      // Create the master node with a dummy address
140      ServerName firstMasterAddress =
141          ServerName.valueOf("localhost", 1, System.currentTimeMillis());
142      ServerName secondMasterAddress =
143          ServerName.valueOf("localhost", 2, System.currentTimeMillis());
144
145      // Should not have a master yet
146      DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress);
147      ActiveMasterManager activeMasterManager =
148          ms1.getActiveMasterManager();
149      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
150      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
151
152      // First test becoming the active master uninterrupted
153      ClusterStatusTracker clusterStatusTracker =
154          ms1.getClusterStatusTracker();
155      clusterStatusTracker.setClusterUp();
156      activeMasterManager.blockUntilBecomingActiveMaster(100,
157          Mockito.mock(MonitoredTask.class));
158      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
159      assertMaster(zk, firstMasterAddress);
160      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
161
162      // New manager will now try to become the active master in another thread
163      WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
164      t.start();
165      // Wait for this guy to figure out there is another active master
166      // Wait for 1 second at most
167      int sleeps = 0;
168      while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) {
169        Thread.sleep(10);
170        sleeps++;
171      }
172
173      // Both should see that there is an active master
174      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
175      assertTrue(t.manager.clusterHasActiveMaster.get());
176      // But secondary one should not be the active master
177      assertFalse(t.isActiveMaster);
178      // Verify the active master ServerName is populated in standby master.
179      assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get());
180
181      // Close the first server and delete it's master node
182      ms1.stop("stopping first server");
183
184      // Use a listener to capture when the node is actually deleted
185      NodeDeletionListener listener = new NodeDeletionListener(zk,
186          zk.getZNodePaths().masterAddressZNode);
187      zk.registerListener(listener);
188
189      LOG.info("Deleting master node");
190      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
191
192      // Wait for the node to be deleted
193      LOG.info("Waiting for active master manager to be notified");
194      listener.waitForDeletion();
195      LOG.info("Master node deleted");
196
197      // Now we expect the secondary manager to have and be the active master
198      // Wait for 1 second at most
199      sleeps = 0;
200      while (!t.isActiveMaster && sleeps < 100) {
201        Thread.sleep(10);
202        sleeps++;
203      }
204      LOG.debug("Slept " + sleeps + " times");
205
206      assertTrue(t.manager.clusterHasActiveMaster.get());
207      assertTrue(t.isActiveMaster);
208      assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get());
209
210      LOG.info("Deleting master node");
211
212      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
213    }
214  }
215
216  @Test
217  public void testBackupMasterUpdates() throws Exception {
218    Configuration conf = TEST_UTIL.getConfiguration();
219    try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) {
220      ServerName sn1 = ServerName.valueOf("localhost", 1, -1);
221      DummyMaster master1 = new DummyMaster(zk, sn1);
222      ActiveMasterManager activeMasterManager = master1.getActiveMasterManager();
223      activeMasterManager.blockUntilBecomingActiveMaster(100,
224          Mockito.mock(MonitoredTask.class));
225      assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get());
226      assertEquals(0, activeMasterManager.getBackupMasters().size());
227      // Add backup masters
228      List<String> backupZNodes = new ArrayList<>();
229      for (int i = 1; i <= 10; i++) {
230        ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1);
231        String backupZn = ZNodePaths.joinZNode(
232            zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString());
233        backupZNodes.add(backupZn);
234        MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234);
235        TEST_UTIL.waitFor(10000,
236          () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size());
237      }
238      // Remove backup masters
239      int numBackups = backupZNodes.size();
240      for (String backupZNode: backupZNodes) {
241        ZKUtil.deleteNode(zk, backupZNode);
242        final int currentBackups = --numBackups;
243        TEST_UTIL.waitFor(10000,
244          () -> activeMasterManager.getBackupMasters().size() == currentBackups);
245      }
246    }
247  }
248
249  /**
250   * Assert there is an active master and that it has the specified address.
251   * @param zk single Zookeeper watcher
252   * @param expectedAddress the expected address of the master
253   * @throws KeeperException unexpected Zookeeper exception
254   * @throws IOException if an IO problem is encountered
255   */
256  private void assertMaster(ZKWatcher zk, ServerName expectedAddress) throws
257      KeeperException, IOException {
258    ServerName readAddress = MasterAddressTracker.getMasterAddress(zk);
259    assertNotNull(readAddress);
260    assertEquals(expectedAddress, readAddress);
261  }
262
263  public static class WaitToBeMasterThread extends Thread {
264
265    ActiveMasterManager manager;
266    DummyMaster dummyMaster;
267    boolean isActiveMaster;
268
269    public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException {
270      this.dummyMaster = new DummyMaster(zk,address);
271      this.manager = this.dummyMaster.getActiveMasterManager();
272      isActiveMaster = false;
273    }
274
275    @Override
276    public void run() {
277      manager.blockUntilBecomingActiveMaster(100,
278          Mockito.mock(MonitoredTask.class));
279      LOG.info("Second master has become the active master!");
280      isActiveMaster = true;
281    }
282  }
283
284  public static class NodeDeletionListener extends ZKListener {
285    private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class);
286
287    private Semaphore lock;
288    private String node;
289
290    public NodeDeletionListener(ZKWatcher watcher, String node) {
291      super(watcher);
292      lock = new Semaphore(0);
293      this.node = node;
294    }
295
296    @Override
297    public void nodeDeleted(String path) {
298      if(path.equals(node)) {
299        LOG.debug("nodeDeleted(" + path + ")");
300        lock.release();
301      }
302    }
303
304    public void waitForDeletion() throws InterruptedException {
305      lock.acquire();
306    }
307  }
308
309  /**
310   * Dummy Master Implementation.
311   */
312  public static class DummyMaster implements Server {
313    private volatile boolean stopped;
314    private ClusterStatusTracker clusterStatusTracker;
315    private ActiveMasterManager activeMasterManager;
316
317    public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException {
318      this.clusterStatusTracker =
319        new ClusterStatusTracker(zk, this);
320      clusterStatusTracker.start();
321
322      this.activeMasterManager =
323        new ActiveMasterManager(zk, master, this);
324      zk.registerListener(activeMasterManager);
325    }
326
327    @Override
328    public void abort(final String msg, final Throwable t) {}
329
330    @Override
331    public boolean isAborted() {
332      return false;
333    }
334
335    @Override
336    public Configuration getConfiguration() {
337      return null;
338    }
339
340    @Override
341    public ZKWatcher getZooKeeper() {
342      return null;
343    }
344
345    @Override
346    public CoordinatedStateManager getCoordinatedStateManager() {
347      return null;
348    }
349
350    @Override
351    public ServerName getServerName() {
352      return null;
353    }
354
355    @Override
356    public boolean isStopped() {
357      return this.stopped;
358    }
359
360    @Override
361    public void stop(String why) {
362      this.stopped = true;
363    }
364
365    @Override
366    public ClusterConnection getConnection() {
367      return null;
368    }
369
370    public ClusterStatusTracker getClusterStatusTracker() {
371      return clusterStatusTracker;
372    }
373
374    public ActiveMasterManager getActiveMasterManager() {
375      return activeMasterManager;
376    }
377
378    @Override
379    public ChoreService getChoreService() {
380      return null;
381    }
382
383    @Override
384    public ClusterConnection getClusterConnection() {
385      // TODO Auto-generated method stub
386      return null;
387    }
388
389    @Override
390    public FileSystem getFileSystem() {
391      return null;
392    }
393
394    @Override
395    public boolean isStopping() {
396      return false;
397    }
398
399    @Override
400    public Connection createConnection(Configuration conf) throws IOException {
401      return null;
402    }
403  }
404}