001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.util.concurrent.Semaphore;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.FileSystem;
029import org.apache.hadoop.hbase.ChoreService;
030import org.apache.hadoop.hbase.CoordinatedStateManager;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.Server;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.client.ClusterConnection;
036import org.apache.hadoop.hbase.client.Connection;
037import org.apache.hadoop.hbase.monitoring.MonitoredTask;
038import org.apache.hadoop.hbase.testclassification.MasterTests;
039import org.apache.hadoop.hbase.testclassification.MediumTests;
040import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
041import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
042import org.apache.hadoop.hbase.zookeeper.ZKListener;
043import org.apache.hadoop.hbase.zookeeper.ZKUtil;
044import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
045import org.apache.zookeeper.KeeperException;
046import org.junit.AfterClass;
047import org.junit.BeforeClass;
048import org.junit.ClassRule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.mockito.Mockito;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055/**
056 * Test the {@link ActiveMasterManager}.
057 */
058@Category({MasterTests.class, MediumTests.class})
059public class TestActiveMasterManager {
060
061  @ClassRule
062  public static final HBaseClassTestRule CLASS_RULE =
063      HBaseClassTestRule.forClass(TestActiveMasterManager.class);
064
065  private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class);
066  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
067
068  @BeforeClass
069  public static void setUpBeforeClass() throws Exception {
070    TEST_UTIL.startMiniZKCluster();
071  }
072
073  @AfterClass
074  public static void tearDownAfterClass() throws Exception {
075    TEST_UTIL.shutdownMiniZKCluster();
076  }
077
078  @Test public void testRestartMaster() throws IOException, KeeperException {
079    ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(),
080      "testActiveMasterManagerFromZK", null, true);
081    try {
082      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
083      ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
084    } catch(KeeperException.NoNodeException nne) {}
085
086    // Create the master node with a dummy address
087    ServerName master = ServerName.valueOf("localhost", 1, System.currentTimeMillis());
088    // Should not have a master yet
089    DummyMaster dummyMaster = new DummyMaster(zk,master);
090    ClusterStatusTracker clusterStatusTracker =
091      dummyMaster.getClusterStatusTracker();
092    ActiveMasterManager activeMasterManager =
093      dummyMaster.getActiveMasterManager();
094    assertFalse(activeMasterManager.clusterHasActiveMaster.get());
095    assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
096
097    // First test becoming the active master uninterrupted
098    MonitoredTask status = Mockito.mock(MonitoredTask.class);
099    clusterStatusTracker.setClusterUp();
100
101    activeMasterManager.blockUntilBecomingActiveMaster(100, status);
102    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
103    assertMaster(zk, master);
104    assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
105
106    // Now pretend master restart
107    DummyMaster secondDummyMaster = new DummyMaster(zk,master);
108    ActiveMasterManager secondActiveMasterManager =
109      secondDummyMaster.getActiveMasterManager();
110    assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
111    activeMasterManager.blockUntilBecomingActiveMaster(100, status);
112    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
113    assertMaster(zk, master);
114    assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
115    assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get());
116  }
117
118  /**
119   * Unit tests that uses ZooKeeper but does not use the master-side methods
120   * but rather acts directly on ZK.
121   * @throws Exception
122   */
123  @Test
124  public void testActiveMasterManagerFromZK() throws Exception {
125    ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(),
126      "testActiveMasterManagerFromZK", null, true);
127    try {
128      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
129      ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
130    } catch(KeeperException.NoNodeException nne) {}
131
132    // Create the master node with a dummy address
133    ServerName firstMasterAddress =
134        ServerName.valueOf("localhost", 1, System.currentTimeMillis());
135    ServerName secondMasterAddress =
136        ServerName.valueOf("localhost", 2, System.currentTimeMillis());
137
138    // Should not have a master yet
139    DummyMaster ms1 = new DummyMaster(zk,firstMasterAddress);
140    ActiveMasterManager activeMasterManager =
141      ms1.getActiveMasterManager();
142    assertFalse(activeMasterManager.clusterHasActiveMaster.get());
143    assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
144
145    // First test becoming the active master uninterrupted
146    ClusterStatusTracker clusterStatusTracker =
147      ms1.getClusterStatusTracker();
148    clusterStatusTracker.setClusterUp();
149    activeMasterManager.blockUntilBecomingActiveMaster(100,
150        Mockito.mock(MonitoredTask.class));
151    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
152    assertMaster(zk, firstMasterAddress);
153    assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
154
155    // New manager will now try to become the active master in another thread
156    WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
157    t.start();
158    // Wait for this guy to figure out there is another active master
159    // Wait for 1 second at most
160    int sleeps = 0;
161    while(!t.manager.clusterHasActiveMaster.get() && sleeps < 100) {
162      Thread.sleep(10);
163      sleeps++;
164    }
165
166    // Both should see that there is an active master
167    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
168    assertTrue(t.manager.clusterHasActiveMaster.get());
169    // But secondary one should not be the active master
170    assertFalse(t.isActiveMaster);
171    // Verify the active master ServerName is populated in standby master.
172    assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get());
173
174    // Close the first server and delete it's master node
175    ms1.stop("stopping first server");
176
177    // Use a listener to capture when the node is actually deleted
178    NodeDeletionListener listener = new NodeDeletionListener(zk,
179            zk.getZNodePaths().masterAddressZNode);
180    zk.registerListener(listener);
181
182    LOG.info("Deleting master node");
183    ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
184
185    // Wait for the node to be deleted
186    LOG.info("Waiting for active master manager to be notified");
187    listener.waitForDeletion();
188    LOG.info("Master node deleted");
189
190    // Now we expect the secondary manager to have and be the active master
191    // Wait for 1 second at most
192    sleeps = 0;
193    while(!t.isActiveMaster && sleeps < 100) {
194      Thread.sleep(10);
195      sleeps++;
196    }
197    LOG.debug("Slept " + sleeps + " times");
198
199    assertTrue(t.manager.clusterHasActiveMaster.get());
200    assertTrue(t.isActiveMaster);
201    assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get());
202
203    LOG.info("Deleting master node");
204
205    ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
206  }
207
208  /**
209   * Assert there is an active master and that it has the specified address.
210   * @param zk single Zookeeper watcher
211   * @param expectedAddress the expected address of the master
212   * @throws KeeperException unexpected Zookeeper exception
213   * @throws IOException if an IO problem is encountered
214   */
215  private void assertMaster(ZKWatcher zk,
216      ServerName expectedAddress)
217  throws KeeperException, IOException {
218    ServerName readAddress = MasterAddressTracker.getMasterAddress(zk);
219    assertNotNull(readAddress);
220    assertTrue(expectedAddress.equals(readAddress));
221  }
222
223  public static class WaitToBeMasterThread extends Thread {
224
225    ActiveMasterManager manager;
226    DummyMaster dummyMaster;
227    boolean isActiveMaster;
228
229    public WaitToBeMasterThread(ZKWatcher zk, ServerName address) {
230      this.dummyMaster = new DummyMaster(zk,address);
231      this.manager = this.dummyMaster.getActiveMasterManager();
232      isActiveMaster = false;
233    }
234
235    @Override
236    public void run() {
237      manager.blockUntilBecomingActiveMaster(100,
238          Mockito.mock(MonitoredTask.class));
239      LOG.info("Second master has become the active master!");
240      isActiveMaster = true;
241    }
242  }
243
244  public static class NodeDeletionListener extends ZKListener {
245    private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class);
246
247    private Semaphore lock;
248    private String node;
249
250    public NodeDeletionListener(ZKWatcher watcher, String node) {
251      super(watcher);
252      lock = new Semaphore(0);
253      this.node = node;
254    }
255
256    @Override
257    public void nodeDeleted(String path) {
258      if(path.equals(node)) {
259        LOG.debug("nodeDeleted(" + path + ")");
260        lock.release();
261      }
262    }
263
264    public void waitForDeletion() throws InterruptedException {
265      lock.acquire();
266    }
267  }
268
269  /**
270   * Dummy Master Implementation.
271   */
272  public static class DummyMaster implements Server {
273    private volatile boolean stopped;
274    private ClusterStatusTracker clusterStatusTracker;
275    private ActiveMasterManager activeMasterManager;
276
277    public DummyMaster(ZKWatcher zk, ServerName master) {
278      this.clusterStatusTracker =
279        new ClusterStatusTracker(zk, this);
280      clusterStatusTracker.start();
281
282      this.activeMasterManager =
283        new ActiveMasterManager(zk, master, this);
284      zk.registerListener(activeMasterManager);
285    }
286
287    @Override
288    public void abort(final String msg, final Throwable t) {}
289
290    @Override
291    public boolean isAborted() {
292      return false;
293    }
294
295    @Override
296    public Configuration getConfiguration() {
297      return null;
298    }
299
300    @Override
301    public ZKWatcher getZooKeeper() {
302      return null;
303    }
304
305    @Override
306    public CoordinatedStateManager getCoordinatedStateManager() {
307      return null;
308    }
309
310    @Override
311    public ServerName getServerName() {
312      return null;
313    }
314
315    @Override
316    public boolean isStopped() {
317      return this.stopped;
318    }
319
320    @Override
321    public void stop(String why) {
322      this.stopped = true;
323    }
324
325    @Override
326    public ClusterConnection getConnection() {
327      return null;
328    }
329
330    public ClusterStatusTracker getClusterStatusTracker() {
331      return clusterStatusTracker;
332    }
333
334    public ActiveMasterManager getActiveMasterManager() {
335      return activeMasterManager;
336    }
337
338    @Override
339    public ChoreService getChoreService() {
340      return null;
341    }
342
343    @Override
344    public ClusterConnection getClusterConnection() {
345      // TODO Auto-generated method stub
346      return null;
347    }
348
349    @Override
350    public FileSystem getFileSystem() {
351      return null;
352    }
353
354    @Override
355    public boolean isStopping() {
356      return false;
357    }
358
359    @Override
360    public Connection createConnection(Configuration conf) throws IOException {
361      return null;
362    }
363  }
364}