001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertFalse;
022import static org.junit.jupiter.api.Assertions.assertNotNull;
023import static org.junit.jupiter.api.Assertions.assertTrue;
024import static org.mockito.ArgumentMatchers.any;
025import static org.mockito.Mockito.when;
026
027import java.io.IOException;
028import java.io.InterruptedIOException;
029import java.util.ArrayList;
030import java.util.List;
031import java.util.concurrent.Semaphore;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hbase.HBaseTestingUtil;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.keymeta.KeyManagementService;
036import org.apache.hadoop.hbase.monitoring.MonitoredTask;
037import org.apache.hadoop.hbase.monitoring.TaskGroup;
038import org.apache.hadoop.hbase.testclassification.MasterTests;
039import org.apache.hadoop.hbase.testclassification.MediumTests;
040import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
041import org.apache.hadoop.hbase.util.MockServer;
042import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
043import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
044import org.apache.hadoop.hbase.zookeeper.ZKListener;
045import org.apache.hadoop.hbase.zookeeper.ZKUtil;
046import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
047import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
048import org.apache.zookeeper.KeeperException;
049import org.junit.jupiter.api.AfterAll;
050import org.junit.jupiter.api.BeforeAll;
051import org.junit.jupiter.api.Tag;
052import org.junit.jupiter.api.Test;
053import org.mockito.Mockito;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057/**
058 * Test the {@link ActiveMasterManager}.
059 */
060@Tag(MasterTests.TAG)
061@Tag(MediumTests.TAG)
062public class TestActiveMasterManager {
063
064  private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class);
065  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
066
067  @BeforeAll
068  public static void setUpBeforeClass() throws Exception {
069    TEST_UTIL.startMiniZKCluster();
070  }
071
072  @AfterAll
073  public static void tearDownAfterClass() throws Exception {
074    TEST_UTIL.shutdownMiniZKCluster();
075  }
076
077  @Test
078  public void testRestartMaster() throws IOException, KeeperException {
079    try (ZKWatcher zk =
080      new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) {
081      try {
082        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
083        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
084      } catch (KeeperException.NoNodeException nne) {
085      }
086
087      // Create the master node with a dummy address
088      ServerName master = ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime());
089      // Should not have a master yet
090      DummyMaster dummyMaster = new DummyMaster(zk, master);
091      ClusterStatusTracker clusterStatusTracker = dummyMaster.getClusterStatusTracker();
092      ActiveMasterManager activeMasterManager = dummyMaster.getActiveMasterManager();
093      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
094      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
095
096      // First test becoming the active master uninterrupted
097      TaskGroup status = mockTaskGroup();
098      clusterStatusTracker.setClusterUp();
099
100      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
101      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
102      assertMaster(zk, master);
103      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
104
105      // Now pretend master restart
106      DummyMaster secondDummyMaster = new DummyMaster(zk, master);
107      ActiveMasterManager secondActiveMasterManager = secondDummyMaster.getActiveMasterManager();
108      assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
109      activeMasterManager.blockUntilBecomingActiveMaster(100, status);
110      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
111      assertMaster(zk, master);
112      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
113      assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get());
114    }
115  }
116
117  /**
118   * Unit tests that uses ZooKeeper but does not use the master-side methods but rather acts
119   * directly on ZK.
120   */
121  @Test
122  public void testActiveMasterManagerFromZK() throws Exception {
123    try (ZKWatcher zk =
124      new ZKWatcher(TEST_UTIL.getConfiguration(), "testActiveMasterManagerFromZK", null, true)) {
125      try {
126        ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
127        ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
128      } catch (KeeperException.NoNodeException nne) {
129      }
130
131      // Create the master node with a dummy address
132      ServerName firstMasterAddress =
133        ServerName.valueOf("localhost", 1, EnvironmentEdgeManager.currentTime());
134      ServerName secondMasterAddress =
135        ServerName.valueOf("localhost", 2, EnvironmentEdgeManager.currentTime());
136
137      // Should not have a master yet
138      DummyMaster ms1 = new DummyMaster(zk, firstMasterAddress);
139      ActiveMasterManager activeMasterManager = ms1.getActiveMasterManager();
140      assertFalse(activeMasterManager.clusterHasActiveMaster.get());
141      assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
142
143      // First test becoming the active master uninterrupted
144      ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker();
145      clusterStatusTracker.setClusterUp();
146
147      activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
148      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
149      assertMaster(zk, firstMasterAddress);
150      assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
151
152      // New manager will now try to become the active master in another thread
153      WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
154      t.start();
155      // Wait for this guy to figure out there is another active master
156      // Wait for 1 second at most
157      int sleeps = 0;
158      while (!t.manager.clusterHasActiveMaster.get() && sleeps < 100) {
159        Thread.sleep(10);
160        sleeps++;
161      }
162
163      // Both should see that there is an active master
164      assertTrue(activeMasterManager.clusterHasActiveMaster.get());
165      assertTrue(t.manager.clusterHasActiveMaster.get());
166      // But secondary one should not be the active master
167      assertFalse(t.isActiveMaster);
168      // Verify the active master ServerName is populated in standby master.
169      assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get());
170
171      // Close the first server and delete it's master node
172      ms1.stop("stopping first server");
173
174      // Use a listener to capture when the node is actually deleted
175      NodeDeletionListener listener =
176        new NodeDeletionListener(zk, zk.getZNodePaths().masterAddressZNode);
177      zk.registerListener(listener);
178
179      LOG.info("Deleting master node");
180      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
181
182      // Wait for the node to be deleted
183      LOG.info("Waiting for active master manager to be notified");
184      listener.waitForDeletion();
185      LOG.info("Master node deleted");
186
187      // Now we expect the secondary manager to have and be the active master
188      // Wait for 1 second at most
189      sleeps = 0;
190      while (!t.isActiveMaster && sleeps < 100) {
191        Thread.sleep(10);
192        sleeps++;
193      }
194      LOG.debug("Slept " + sleeps + " times");
195
196      assertTrue(t.manager.clusterHasActiveMaster.get());
197      assertTrue(t.isActiveMaster);
198      assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get());
199
200      LOG.info("Deleting master node");
201
202      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
203    }
204  }
205
206  @Test
207  public void testBackupMasterUpdates() throws Exception {
208    Configuration conf = TEST_UTIL.getConfiguration();
209    try (ZKWatcher zk = new ZKWatcher(conf, "testBackupMasterUpdates", null, true)) {
210      ServerName sn1 = ServerName.valueOf("localhost", 1, -1);
211      DummyMaster master1 = new DummyMaster(zk, sn1);
212      ActiveMasterManager activeMasterManager = master1.getActiveMasterManager();
213      activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
214      assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get());
215      assertEquals(0, activeMasterManager.getBackupMasters().size());
216      // Add backup masters
217      List<String> backupZNodes = new ArrayList<>();
218      for (int i = 1; i <= 10; i++) {
219        ServerName backupSn = ServerName.valueOf("localhost", 1000 + i, -1);
220        String backupZn =
221          ZNodePaths.joinZNode(zk.getZNodePaths().backupMasterAddressesZNode, backupSn.toString());
222        backupZNodes.add(backupZn);
223        MasterAddressTracker.setMasterAddress(zk, backupZn, backupSn, 1234);
224        TEST_UTIL.waitFor(10000,
225          () -> activeMasterManager.getBackupMasters().size() == backupZNodes.size());
226      }
227      // Remove backup masters
228      int numBackups = backupZNodes.size();
229      for (String backupZNode : backupZNodes) {
230        ZKUtil.deleteNode(zk, backupZNode);
231        final int currentBackups = --numBackups;
232        TEST_UTIL.waitFor(10000,
233          () -> activeMasterManager.getBackupMasters().size() == currentBackups);
234      }
235    }
236  }
237
238  /**
239   * Assert there is an active master and that it has the specified address.
240   * @param zk              single Zookeeper watcher
241   * @param expectedAddress the expected address of the master
242   * @throws KeeperException unexpected Zookeeper exception
243   * @throws IOException     if an IO problem is encountered
244   */
245  private void assertMaster(ZKWatcher zk, ServerName expectedAddress)
246    throws KeeperException, IOException {
247    ServerName readAddress = MasterAddressTracker.getMasterAddress(zk);
248    assertNotNull(readAddress);
249    assertEquals(expectedAddress, readAddress);
250  }
251
252  public static class WaitToBeMasterThread extends Thread {
253
254    ActiveMasterManager manager;
255    DummyMaster dummyMaster;
256    boolean isActiveMaster;
257
258    public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws InterruptedIOException {
259      this.dummyMaster = new DummyMaster(zk, address);
260      this.manager = this.dummyMaster.getActiveMasterManager();
261      isActiveMaster = false;
262    }
263
264    @Override
265    public void run() {
266      manager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
267      LOG.info("Second master has become the active master!");
268      isActiveMaster = true;
269    }
270  }
271
272  private static TaskGroup mockTaskGroup() {
273    TaskGroup taskGroup = Mockito.mock(TaskGroup.class);
274    MonitoredTask task = Mockito.mock(MonitoredTask.class);
275    when(taskGroup.addTask(any())).thenReturn(task);
276    return taskGroup;
277  }
278
279  public static class NodeDeletionListener extends ZKListener {
280    private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class);
281
282    private Semaphore lock;
283    private String node;
284
285    public NodeDeletionListener(ZKWatcher watcher, String node) {
286      super(watcher);
287      lock = new Semaphore(0);
288      this.node = node;
289    }
290
291    @Override
292    public void nodeDeleted(String path) {
293      if (path.equals(node)) {
294        LOG.debug("nodeDeleted(" + path + ")");
295        lock.release();
296      }
297    }
298
299    public void waitForDeletion() throws InterruptedException {
300      lock.acquire();
301    }
302  }
303
304  /**
305   * Dummy Master Implementation.
306   */
307  public static class DummyMaster extends MockServer {
308    private ClusterStatusTracker clusterStatusTracker;
309    private ActiveMasterManager activeMasterManager;
310
311    public DummyMaster(ZKWatcher zk, ServerName master) throws InterruptedIOException {
312      this.clusterStatusTracker = new ClusterStatusTracker(zk, this);
313      clusterStatusTracker.start();
314
315      this.activeMasterManager = new ActiveMasterManager(zk, master, this);
316      zk.registerListener(activeMasterManager);
317    }
318
319    public ClusterStatusTracker getClusterStatusTracker() {
320      return clusterStatusTracker;
321    }
322
323    public ActiveMasterManager getActiveMasterManager() {
324      return activeMasterManager;
325    }
326
327    @Override
328    public KeyManagementService getKeyManagementService() {
329      return null;
330    }
331  }
332}