001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.assertFalse;
021import static org.junit.Assert.assertNotNull;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.concurrent.Semaphore;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.FileSystem;
028import org.apache.hadoop.hbase.ChoreService;
029import org.apache.hadoop.hbase.CoordinatedStateManager;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.Server;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.client.ClusterConnection;
035import org.apache.hadoop.hbase.client.Connection;
036import org.apache.hadoop.hbase.monitoring.MonitoredTask;
037import org.apache.hadoop.hbase.testclassification.MasterTests;
038import org.apache.hadoop.hbase.testclassification.MediumTests;
039import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
040import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
041import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
042import org.apache.hadoop.hbase.zookeeper.ZKListener;
043import org.apache.hadoop.hbase.zookeeper.ZKUtil;
044import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
045import org.apache.zookeeper.KeeperException;
046import org.junit.AfterClass;
047import org.junit.BeforeClass;
048import org.junit.ClassRule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.mockito.Mockito;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055/**
056 * Test the {@link ActiveMasterManager}.
057 */
058@Category({MasterTests.class, MediumTests.class})
059public class TestActiveMasterManager {
060
061  @ClassRule
062  public static final HBaseClassTestRule CLASS_RULE =
063      HBaseClassTestRule.forClass(TestActiveMasterManager.class);
064
065  private final static Logger LOG = LoggerFactory.getLogger(TestActiveMasterManager.class);
066  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
067
068  @BeforeClass
069  public static void setUpBeforeClass() throws Exception {
070    TEST_UTIL.startMiniZKCluster();
071  }
072
073  @AfterClass
074  public static void tearDownAfterClass() throws Exception {
075    TEST_UTIL.shutdownMiniZKCluster();
076  }
077
078  @Test public void testRestartMaster() throws IOException, KeeperException {
079    ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(),
080      "testActiveMasterManagerFromZK", null, true);
081    try {
082      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
083      ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
084    } catch(KeeperException.NoNodeException nne) {}
085
086    // Create the master node with a dummy address
087    ServerName master = ServerName.valueOf("localhost", 1, System.currentTimeMillis());
088    // Should not have a master yet
089    DummyMaster dummyMaster = new DummyMaster(zk,master);
090    ClusterStatusTracker clusterStatusTracker =
091      dummyMaster.getClusterStatusTracker();
092    ActiveMasterManager activeMasterManager =
093      dummyMaster.getActiveMasterManager();
094    assertFalse(activeMasterManager.clusterHasActiveMaster.get());
095
096    // First test becoming the active master uninterrupted
097    MonitoredTask status = Mockito.mock(MonitoredTask.class);
098    clusterStatusTracker.setClusterUp();
099
100    activeMasterManager.blockUntilBecomingActiveMaster(100, status);
101    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
102    assertMaster(zk, master);
103
104    // Now pretend master restart
105    DummyMaster secondDummyMaster = new DummyMaster(zk,master);
106    ActiveMasterManager secondActiveMasterManager =
107      secondDummyMaster.getActiveMasterManager();
108    assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
109    activeMasterManager.blockUntilBecomingActiveMaster(100, status);
110    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
111    assertMaster(zk, master);
112  }
113
114  /**
115   * Unit tests that uses ZooKeeper but does not use the master-side methods
116   * but rather acts directly on ZK.
117   * @throws Exception
118   */
119  @Test
120  public void testActiveMasterManagerFromZK() throws Exception {
121    ZKWatcher zk = new ZKWatcher(TEST_UTIL.getConfiguration(),
122      "testActiveMasterManagerFromZK", null, true);
123    try {
124      ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
125      ZKUtil.deleteNode(zk, zk.getZNodePaths().clusterStateZNode);
126    } catch(KeeperException.NoNodeException nne) {}
127
128    // Create the master node with a dummy address
129    ServerName firstMasterAddress =
130        ServerName.valueOf("localhost", 1, System.currentTimeMillis());
131    ServerName secondMasterAddress =
132        ServerName.valueOf("localhost", 2, System.currentTimeMillis());
133
134    // Should not have a master yet
135    DummyMaster ms1 = new DummyMaster(zk,firstMasterAddress);
136    ActiveMasterManager activeMasterManager =
137      ms1.getActiveMasterManager();
138    assertFalse(activeMasterManager.clusterHasActiveMaster.get());
139
140    // First test becoming the active master uninterrupted
141    ClusterStatusTracker clusterStatusTracker =
142      ms1.getClusterStatusTracker();
143    clusterStatusTracker.setClusterUp();
144    activeMasterManager.blockUntilBecomingActiveMaster(100,
145        Mockito.mock(MonitoredTask.class));
146    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
147    assertMaster(zk, firstMasterAddress);
148
149    // New manager will now try to become the active master in another thread
150    WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
151    t.start();
152    // Wait for this guy to figure out there is another active master
153    // Wait for 1 second at most
154    int sleeps = 0;
155    while(!t.manager.clusterHasActiveMaster.get() && sleeps < 100) {
156      Thread.sleep(10);
157      sleeps++;
158    }
159
160    // Both should see that there is an active master
161    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
162    assertTrue(t.manager.clusterHasActiveMaster.get());
163    // But secondary one should not be the active master
164    assertFalse(t.isActiveMaster);
165
166    // Close the first server and delete it's master node
167    ms1.stop("stopping first server");
168
169    // Use a listener to capture when the node is actually deleted
170    NodeDeletionListener listener = new NodeDeletionListener(zk,
171            zk.getZNodePaths().masterAddressZNode);
172    zk.registerListener(listener);
173
174    LOG.info("Deleting master node");
175    ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
176
177    // Wait for the node to be deleted
178    LOG.info("Waiting for active master manager to be notified");
179    listener.waitForDeletion();
180    LOG.info("Master node deleted");
181
182    // Now we expect the secondary manager to have and be the active master
183    // Wait for 1 second at most
184    sleeps = 0;
185    while(!t.isActiveMaster && sleeps < 100) {
186      Thread.sleep(10);
187      sleeps++;
188    }
189    LOG.debug("Slept " + sleeps + " times");
190
191    assertTrue(t.manager.clusterHasActiveMaster.get());
192    assertTrue(t.isActiveMaster);
193
194    LOG.info("Deleting master node");
195
196    ZKUtil.deleteNode(zk, zk.getZNodePaths().masterAddressZNode);
197  }
198
199  /**
200   * Assert there is an active master and that it has the specified address.
201   * @param zk single Zookeeper watcher
202   * @param expectedAddress the expected address of the master
203   * @throws KeeperException unexpected Zookeeper exception
204   * @throws IOException if an IO problem is encountered
205   */
206  private void assertMaster(ZKWatcher zk,
207      ServerName expectedAddress)
208  throws KeeperException, IOException {
209    ServerName readAddress = MasterAddressTracker.getMasterAddress(zk);
210    assertNotNull(readAddress);
211    assertTrue(expectedAddress.equals(readAddress));
212  }
213
214  public static class WaitToBeMasterThread extends Thread {
215
216    ActiveMasterManager manager;
217    DummyMaster dummyMaster;
218    boolean isActiveMaster;
219
220    public WaitToBeMasterThread(ZKWatcher zk, ServerName address) {
221      this.dummyMaster = new DummyMaster(zk,address);
222      this.manager = this.dummyMaster.getActiveMasterManager();
223      isActiveMaster = false;
224    }
225
226    @Override
227    public void run() {
228      manager.blockUntilBecomingActiveMaster(100,
229          Mockito.mock(MonitoredTask.class));
230      LOG.info("Second master has become the active master!");
231      isActiveMaster = true;
232    }
233  }
234
235  public static class NodeDeletionListener extends ZKListener {
236    private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class);
237
238    private Semaphore lock;
239    private String node;
240
241    public NodeDeletionListener(ZKWatcher watcher, String node) {
242      super(watcher);
243      lock = new Semaphore(0);
244      this.node = node;
245    }
246
247    @Override
248    public void nodeDeleted(String path) {
249      if(path.equals(node)) {
250        LOG.debug("nodeDeleted(" + path + ")");
251        lock.release();
252      }
253    }
254
255    public void waitForDeletion() throws InterruptedException {
256      lock.acquire();
257    }
258  }
259
260  /**
261   * Dummy Master Implementation.
262   */
263  public static class DummyMaster implements Server {
264    private volatile boolean stopped;
265    private ClusterStatusTracker clusterStatusTracker;
266    private ActiveMasterManager activeMasterManager;
267
268    public DummyMaster(ZKWatcher zk, ServerName master) {
269      this.clusterStatusTracker =
270        new ClusterStatusTracker(zk, this);
271      clusterStatusTracker.start();
272
273      this.activeMasterManager =
274        new ActiveMasterManager(zk, master, this);
275      zk.registerListener(activeMasterManager);
276    }
277
278    @Override
279    public void abort(final String msg, final Throwable t) {}
280
281    @Override
282    public boolean isAborted() {
283      return false;
284    }
285
286    @Override
287    public Configuration getConfiguration() {
288      return null;
289    }
290
291    @Override
292    public ZKWatcher getZooKeeper() {
293      return null;
294    }
295
296    @Override
297    public CoordinatedStateManager getCoordinatedStateManager() {
298      return null;
299    }
300
301    @Override
302    public ServerName getServerName() {
303      return null;
304    }
305
306    @Override
307    public boolean isStopped() {
308      return this.stopped;
309    }
310
311    @Override
312    public void stop(String why) {
313      this.stopped = true;
314    }
315
316    @Override
317    public ClusterConnection getConnection() {
318      return null;
319    }
320
321    @Override
322    public MetaTableLocator getMetaTableLocator() {
323      return null;
324    }
325
326    public ClusterStatusTracker getClusterStatusTracker() {
327      return clusterStatusTracker;
328    }
329
330    public ActiveMasterManager getActiveMasterManager() {
331      return activeMasterManager;
332    }
333
334    @Override
335    public ChoreService getChoreService() {
336      return null;
337    }
338
339    @Override
340    public ClusterConnection getClusterConnection() {
341      // TODO Auto-generated method stub
342      return null;
343    }
344
345    @Override
346    public FileSystem getFileSystem() {
347      return null;
348    }
349
350    @Override
351    public boolean isStopping() {
352      return false;
353    }
354
355    @Override
356    public Connection createConnection(Configuration conf) throws IOException {
357      return null;
358    }
359  }
360}