001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.io.InterruptedIOException;
027import java.util.ArrayList;
028import java.util.List;
029import java.util.concurrent.Semaphore;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.hbase.ChoreService;
033import org.apache.hadoop.hbase.CoordinatedStateManager;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtility;
036import org.apache.hadoop.hbase.Server;
037import org.apache.hadoop.hbase.ServerName;
038import org.apache.hadoop.hbase.client.ClusterConnection;
039import org.apache.hadoop.hbase.client.Connection;
040import org.apache.hadoop.hbase.monitoring.MonitoredTask;
041import org.apache.hadoop.hbase.testclassification.MasterTests;
042import org.apache.hadoop.hbase.testclassification.MediumTests;
043import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
044import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
045import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
046import org.apache.hadoop.hbase.zookeeper.ZKListener;
047import org.apache.hadoop.hbase.zookeeper.ZKUtil;
048import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
049import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
050import org.apache.zookeeper.KeeperException;
051import org.junit.AfterClass;
052import org.junit.BeforeClass;
053import org.junit.ClassRule;
054import org.junit.Test;
055import org.junit.experimental.categories.Category;
056import org.mockito.Mockito;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060/**
061 * Test the {@link ActiveMasterManager}.
062 */
063@Category({ MasterTests.class, MediumTests.class })
064public class TestActiveMasterManager {
065
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068    HBaseClassTestRule.forClass(TestActiveMasterManager.class);
069
070  private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class);
071  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
072
073  @BeforeClass
074  public static void setUpBeforeClass() throws Exception {
075    TEST_UTIL.startMiniZKCluster();
076  }
077
078  @AfterClass
079  public static void tearDownAfterClass() throws Exception {
080    TEST_UTIL.shutdownMiniZKCluster();
081  }
082
083  @Test
084  public void testRestartMaster() throws IOException, KeeperException {
085    try (ZKWatcher zk =
086      new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) {
087      try {
088        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
089        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
090      } catch (KeeperException.NoNodeException nne) {
091      }
092
093      // Create the master node with a dummy address
094      ServerName master = ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime());
095      // Should not have a master yet
096      DummyMaster dummyMaster = new DummyMaster(zk, master);
097      ClusterStatusTracker clusterStatusTracker = dummyMaster.getClusterStatusTracker();
098      ActiveMasterManager activeMasterManager = dummyMaster.getActiveMasterManager();
099      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
100      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
101
102      // First test becoming the active master uninterrupted
103      MonitoredTask status = Mockito.mock(MonitoredTask.class);
104      clusterStatusTracker.setClusterUp();
105
106      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
107      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
108      assertMaster(zk, master);
109      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
110
111      // Now pretend master restart
112      DummyMaster secondDummyMaster = new DummyMaster(zk, master);
113      ActiveMasterManager secondActiveMasterManager = secondDummyMaster.getActiveMasterManager();
114      assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
115      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
116      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
117      assertMaster(zk, master);
118      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
119      assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get());
120    }
121  }
122
123  /**
124   * Unit tests that uses ZooKeeper but does not use the master-side methods but rather acts
125   * directly on ZK. n
126   */
127  @Test
128  public void testActiveMasterManagerFromZK() throws Exception {
129    try (ZKWatcher zk =
130      new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) {
131      try {
132        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
133        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
134      } catch (KeeperException.NoNodeException nne) {
135      }
136
137      // Create the master node with a dummy address
138      ServerName firstMasterAddress =
139        ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime());
140      ServerName secondMasterAddress =
141        ServerName.valueOf("localhost", 2, EnvironmentEdgeManager.currentTime());
142
143      // Should not have a master yet
144      DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress);
145      ActiveMasterManager activeMasterManager = ms1.getActiveMasterManager();
146      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
147      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
148
149      // First test becoming the active master uninterrupted
150      ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker();
151      clusterStatusTracker.setClusterUp();
152      activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class));
153      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
154      assertMaster(zk, firstMasterAddress);
155      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
156
157      // New manager will now try to become the active master in another thread
158      WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
159      t.start();
160      // Wait for this guy to figure out there is another active master
161      // Wait for 1 second at most
162      int sleeps = 0;
163      while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) {
164        Thread.sleep(10);
165        sleeps++;
166      }
167
168      // Both should see that there is an active master
169      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
170      assertTrue(t.manager.clusterHasActiveMaster.get());
171      // But secondary one should not be the active master
172      assertFalse(t.isActiveMaster);
173      // Verify the active master ServerName is populated in standby master.
174      assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get());
175
176      // Close the first server and delete it's master node
177      ms1.stop("stopping first server");
178
179      // Use a listener to capture when the node is actually deleted
180      NodeDeletionListener listener =
181        new NodeDeletionListener(zk, zk.getZNodePaths().masterAddressZNode);
182      zk.registerListener(listener);
183
184      LOG.info("Deleting master node");
185      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
186
187      // Wait for the node to be deleted
188      LOG.info("Waiting for active master manager to be notified");
189      listener.waitForDeletion();
190      LOG.info("Master node deleted");
191
192      // Now we expect the secondary manager to have and be the active master
193      // Wait for 1 second at most
194      sleeps = 0;
195      while (!t.isActiveMaster && sleeps < 100) {
196        Thread.sleep(10);
197        sleeps++;
198      }
199      LOG.debug("Slept " + sleeps + " times");
200
201      assertTrue(t.manager.clusterHasActiveMaster.get());
202      assertTrue(t.isActiveMaster);
203      assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get());
204
205      LOG.info("Deleting master node");
206
207      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
208    }
209  }
210
211  @Test
212  public void testBackupMasterUpdates() throws Exception {
213    Configuration conf = TEST_UTIL.getConfiguration();
214    try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) {
215      ServerName sn1 = ServerName.valueOf("localhost", 1, -1);
216      DummyMaster master1 = new DummyMaster(zk, sn1);
217      ActiveMasterManager activeMasterManager = master1.getActiveMasterManager();
218      activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class));
219      assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get());
220      assertEquals(0, activeMasterManager.getBackupMasters().size());
221      // Add backup masters
222      List<String> backupZNodes = new ArrayList<>();
223      for (int i = 1; i <= 10; i++) {
224        ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1);
225        String backupZn =
226          ZNodePaths.joinZNode(zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString());
227        backupZNodes.add(backupZn);
228        MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234);
229        TEST_UTIL.waitFor(10000,
230          () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size());
231      }
232      // Remove backup masters
233      int numBackups = backupZNodes.size();
234      for (String backupZNode : backupZNodes) {
235        ZKUtil.deleteNode(zk, backupZNode);
236        final int currentBackups = --numBackups;
237        TEST_UTIL.waitFor(10000,
238          () -> activeMasterManager.getBackupMasters().size() == currentBackups);
239      }
240    }
241  }
242
243  /**
244   * Assert there is an active master and that it has the specified address.
245   * @param zk              single Zookeeper watcher
246   * @param expectedAddress the expected address of the master
247   * @throws KeeperException unexpected Zookeeper exception
248   * @throws IOException     if an IO problem is encountered
249   */
250  private void assertMaster(ZKWatcher zk, ServerName expectedAddress)
251    throws KeeperException, IOException {
252    ServerName readAddress = MasterAddressTracker.getMasterAddress(zk);
253    assertNotNull(readAddress);
254    assertEquals(expectedAddress, readAddress);
255  }
256
257  public static class WaitToBeMasterThread extends Thread {
258
259    ActiveMasterManager manager;
260    DummyMaster dummyMaster;
261    boolean isActiveMaster;
262
263    public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException {
264      this.dummyMaster = new DummyMaster(zk, address);
265      this.manager = this.dummyMaster.getActiveMasterManager();
266      isActiveMaster = false;
267    }
268
269    @Override
270    public void run() {
271      manager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class));
272      LOG.info("Second master has become the active master!");
273      isActiveMaster = true;
274    }
275  }
276
277  public static class NodeDeletionListener extends ZKListener {
278    private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class);
279
280    private Semaphore lock;
281    private String node;
282
283    public NodeDeletionListener(ZKWatcher watcher, String node) {
284      super(watcher);
285      lock = new Semaphore(0);
286      this.node = node;
287    }
288
289    @Override
290    public void nodeDeleted(String path) {
291      if (path.equals(node)) {
292        LOG.debug("nodeDeleted(" + path + ")");
293        lock.release();
294      }
295    }
296
297    public void waitForDeletion() throws InterruptedException {
298      lock.acquire();
299    }
300  }
301
302  /**
303   * Dummy Master Implementation.
304   */
305  public static class DummyMaster implements Server {
306    private volatile boolean stopped;
307    private ClusterStatusTracker clusterStatusTracker;
308    private ActiveMasterManager activeMasterManager;
309
310    public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException {
311      this.clusterStatusTracker = new ClusterStatusTracker(zk, this);
312      clusterStatusTracker.start();
313
314      this.activeMasterManager = new ActiveMasterManager(zk, master, this);
315      zk.registerListener(activeMasterManager);
316    }
317
318    @Override
319    public void abort(final String msg, final Throwable t) {
320    }
321
322    @Override
323    public boolean isAborted() {
324      return false;
325    }
326
327    @Override
328    public Configuration getConfiguration() {
329      return null;
330    }
331
332    @Override
333    public ZKWatcher getZooKeeper() {
334      return null;
335    }
336
337    @Override
338    public CoordinatedStateManager getCoordinatedStateManager() {
339      return null;
340    }
341
342    @Override
343    public ServerName getServerName() {
344      return null;
345    }
346
347    @Override
348    public boolean isStopped() {
349      return this.stopped;
350    }
351
352    @Override
353    public void stop(String why) {
354      this.stopped = true;
355    }
356
357    @Override
358    public ClusterConnection getConnection() {
359      return null;
360    }
361
362    public ClusterStatusTracker getClusterStatusTracker() {
363      return clusterStatusTracker;
364    }
365
366    public ActiveMasterManager getActiveMasterManager() {
367      return activeMasterManager;
368    }
369
370    @Override
371    public ChoreService getChoreService() {
372      return null;
373    }
374
375    @Override
376    public ClusterConnection getClusterConnection() {
377      // TODO Auto-generated method stub
378      return null;
379    }
380
381    @Override
382    public FileSystem getFileSystem() {
383      return null;
384    }
385
386    @Override
387    public boolean isStopping() {
388      return false;
389    }
390
391    @Override
392    public Connection createConnection(Configuration conf) throws IOException {
393      return null;
394    }
395  }
396}