001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024import static org.mockito.ArgumentMatchers.any;
025import static org.mockito.Mockito.when;
026
027import java.io.IOException;
028import java.io.InterruptedIOException;
029import java.util.ArrayList;
030import java.util.List;
031import java.util.concurrent.Semaphore;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HBaseTestingUtil;
035import org.apache.hadoop.hbase.ServerName;
036import org.apache.hadoop.hbase.monitoring.MonitoredTask;
037import org.apache.hadoop.hbase.monitoring.TaskGroup;
038import org.apache.hadoop.hbase.testclassification.MasterTests;
039import org.apache.hadoop.hbase.testclassification.MediumTests;
040import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
041import org.apache.hadoop.hbase.util.MockServer;
042import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
043import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
044import org.apache.hadoop.hbase.zookeeper.ZKListener;
045import org.apache.hadoop.hbase.zookeeper.ZKUtil;
046import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
047import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
048import org.apache.zookeeper.KeeperException;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054import org.mockito.Mockito;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058/**
059 * Test the {@link ActiveMasterManager}.
060 */
061@Category({ MasterTests.class, MediumTests.class })
062public class TestActiveMasterManager {
063
064  @ClassRule
065  public static final HBaseClassTestRule CLASS_RULE =
066    HBaseClassTestRule.forClass(TestActiveMasterManager.class);
067
068  private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class);
069  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
070
071  @BeforeClass
072  public static void setUpBeforeClass() throws Exception {
073    TEST_UTIL.startMiniZKCluster();
074  }
075
076  @AfterClass
077  public static void tearDownAfterClass() throws Exception {
078    TEST_UTIL.shutdownMiniZKCluster();
079  }
080
081  @Test
082  public void testRestartMaster() throws IOException, KeeperException {
083    try (ZKWatcher zk =
084      new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) {
085      try {
086        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
087        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
088      } catch (KeeperException.NoNodeException nne) {
089      }
090
091      // Create the master node with a dummy address
092      ServerName master = ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime());
093      // Should not have a master yet
094      DummyMaster dummyMaster = new DummyMaster(zk, master);
095      ClusterStatusTracker clusterStatusTracker = dummyMaster.getClusterStatusTracker();
096      ActiveMasterManager activeMasterManager = dummyMaster.getActiveMasterManager();
097      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
098      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
099
100      // First test becoming the active master uninterrupted
101      TaskGroup status = mockTaskGroup();
102      clusterStatusTracker.setClusterUp();
103
104      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
105      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
106      assertMaster(zk, master);
107      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
108
109      // Now pretend master restart
110      DummyMaster secondDummyMaster = new DummyMaster(zk, master);
111      ActiveMasterManager secondActiveMasterManager = secondDummyMaster.getActiveMasterManager();
112      assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
113      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
114      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
115      assertMaster(zk, master);
116      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
117      assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get());
118    }
119  }
120
121  /**
122   * Unit tests that uses ZooKeeper but does not use the master-side methods but rather acts
123   * directly on ZK.
124   */
125  @Test
126  public void testActiveMasterManagerFromZK() throws Exception {
127    try (ZKWatcher zk =
128      new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) {
129      try {
130        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
131        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
132      } catch (KeeperException.NoNodeException nne) {
133      }
134
135      // Create the master node with a dummy address
136      ServerName firstMasterAddress =
137        ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime());
138      ServerName secondMasterAddress =
139        ServerName.valueOf("localhost", 2, EnvironmentEdgeManager.currentTime());
140
141      // Should not have a master yet
142      DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress);
143      ActiveMasterManager activeMasterManager = ms1.getActiveMasterManager();
144      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
145      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
146
147      // First test becoming the active master uninterrupted
148      ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker();
149      clusterStatusTracker.setClusterUp();
150
151      activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
152      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
153      assertMaster(zk, firstMasterAddress);
154      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
155
156      // New manager will now try to become the active master in another thread
157      WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
158      t.start();
159      // Wait for this guy to figure out there is another active master
160      // Wait for 1 second at most
161      int sleeps = 0;
162      while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) {
163        Thread.sleep(10);
164        sleeps++;
165      }
166
167      // Both should see that there is an active master
168      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
169      assertTrue(t.manager.clusterHasActiveMaster.get());
170      // But secondary one should not be the active master
171      assertFalse(t.isActiveMaster);
172      // Verify the active master ServerName is populated in standby master.
173      assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get());
174
175      // Close the first server and delete it's master node
176      ms1.stop("stopping first server");
177
178      // Use a listener to capture when the node is actually deleted
179      NodeDeletionListener listener =
180        new NodeDeletionListener(zk, zk.getZNodePaths().masterAddressZNode);
181      zk.registerListener(listener);
182
183      LOG.info("Deleting master node");
184      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
185
186      // Wait for the node to be deleted
187      LOG.info("Waiting for active master manager to be notified");
188      listener.waitForDeletion();
189      LOG.info("Master node deleted");
190
191      // Now we expect the secondary manager to have and be the active master
192      // Wait for 1 second at most
193      sleeps = 0;
194      while (!t.isActiveMaster && sleeps < 100) {
195        Thread.sleep(10);
196        sleeps++;
197      }
198      LOG.debug("Slept " + sleeps + " times");
199
200      assertTrue(t.manager.clusterHasActiveMaster.get());
201      assertTrue(t.isActiveMaster);
202      assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get());
203
204      LOG.info("Deleting master node");
205
206      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
207    }
208  }
209
210  @Test
211  public void testBackupMasterUpdates() throws Exception {
212    Configuration conf = TEST_UTIL.getConfiguration();
213    try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) {
214      ServerName sn1 = ServerName.valueOf("localhost", 1, -1);
215      DummyMaster master1 = new DummyMaster(zk, sn1);
216      ActiveMasterManager activeMasterManager = master1.getActiveMasterManager();
217      activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
218      assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get());
219      assertEquals(0, activeMasterManager.getBackupMasters().size());
220      // Add backup masters
221      List<String> backupZNodes = new ArrayList<>();
222      for (int i = 1; i <= 10; i++) {
223        ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1);
224        String backupZn =
225          ZNodePaths.joinZNode(zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString());
226        backupZNodes.add(backupZn);
227        MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234);
228        TEST_UTIL.waitFor(10000,
229          () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size());
230      }
231      // Remove backup masters
232      int numBackups = backupZNodes.size();
233      for (String backupZNode : backupZNodes) {
234        ZKUtil.deleteNode(zk, backupZNode);
235        final int currentBackups = --numBackups;
236        TEST_UTIL.waitFor(10000,
237          () -> activeMasterManager.getBackupMasters().size() == currentBackups);
238      }
239    }
240  }
241
242  /**
243   * Assert there is an active master and that it has the specified address.
244   * @param zk              single Zookeeper watcher
245   * @param expectedAddress the expected address of the master
246   * @throws KeeperException unexpected Zookeeper exception
247   * @throws IOException     if an IO problem is encountered
248   */
249  private void assertMaster(ZKWatcher zk, ServerName expectedAddress)
250    throws KeeperException, IOException {
251    ServerName readAddress = MasterAddressTracker.getMasterAddress(zk);
252    assertNotNull(readAddress);
253    assertEquals(expectedAddress, readAddress);
254  }
255
256  public static class WaitToBeMasterThread extends Thread {
257
258    ActiveMasterManager manager;
259    DummyMaster dummyMaster;
260    boolean isActiveMaster;
261
262    public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException {
263      this.dummyMaster = new DummyMaster(zk, address);
264      this.manager = this.dummyMaster.getActiveMasterManager();
265      isActiveMaster = false;
266    }
267
268    @Override
269    public void run() {
270      manager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
271      LOG.info("Second master has become the active master!");
272      isActiveMaster = true;
273    }
274  }
275
276  private static TaskGroup mockTaskGroup() {
277    TaskGroup taskGroup = Mockito.mock(TaskGroup.class);
278    MonitoredTask task = Mockito.mock(MonitoredTask.class);
279    when(taskGroup.addTask(any())).thenReturn(task);
280    return taskGroup;
281  }
282
283  public static class NodeDeletionListener extends ZKListener {
284    private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class);
285
286    private Semaphore lock;
287    private String node;
288
289    public NodeDeletionListener(ZKWatcher watcher, String node) {
290      super(watcher);
291      lock = new Semaphore(0);
292      this.node = node;
293    }
294
295    @Override
296    public void nodeDeleted(String path) {
297      if (path.equals(node)) {
298        LOG.debug("nodeDeleted(" + path + ")");
299        lock.release();
300      }
301    }
302
303    public void waitForDeletion() throws InterruptedException {
304      lock.acquire();
305    }
306  }
307
308  /**
309   * Dummy Master Implementation.
310   */
311  public static class DummyMaster extends MockServer {
312    private ClusterStatusTracker clusterStatusTracker;
313    private ActiveMasterManager activeMasterManager;
314
315    public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException {
316      this.clusterStatusTracker = new ClusterStatusTracker(zk, this);
317      clusterStatusTracker.start();
318
319      this.activeMasterManager = new ActiveMasterManager(zk, master, this);
320      zk.registerListener(activeMasterManager);
321    }
322
323    public ClusterStatusTracker getClusterStatusTracker() {
324      return clusterStatusTracker;
325    }
326
327    public ActiveMasterManager getActiveMasterManager() {
328      return activeMasterManager;
329    }
330  }
331}