001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import edu.umd.cs.findbugs.annotations.NonNull;
025import java.util.List;
026import java.util.Map;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.client.Admin;
029import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
030import org.apache.hadoop.hbase.client.Put;
031import org.apache.hadoop.hbase.client.RegionInfo;
032import org.apache.hadoop.hbase.client.ResultScanner;
033import org.apache.hadoop.hbase.client.Scan;
034import org.apache.hadoop.hbase.client.Table;
035import org.apache.hadoop.hbase.client.TableDescriptor;
036import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
037import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination;
038import org.apache.hadoop.hbase.master.HMaster;
039import org.apache.hadoop.hbase.master.LoadBalancer;
040import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
041import org.apache.hadoop.hbase.testclassification.MediumTests;
042import org.apache.hadoop.hbase.testclassification.MiscTests;
043import org.apache.hadoop.hbase.util.Bytes;
044import org.apache.hadoop.hbase.util.CommonFSUtils;
045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
046import org.apache.hadoop.hbase.zookeeper.ZKUtil;
047import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
048import org.apache.zookeeper.KeeperException;
049import org.junit.After;
050import org.junit.AfterClass;
051import org.junit.Before;
052import org.junit.BeforeClass;
053import org.junit.ClassRule;
054import org.junit.Rule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.junit.rules.TestName;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061@Category({ MiscTests.class, MediumTests.class })
062public class TestZooKeeper {
063
064  @ClassRule
065  public static final HBaseClassTestRule CLASS_RULE =
066    HBaseClassTestRule.forClass(TestZooKeeper.class);
067
068  private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class);
069
070  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
071
072  @Rule
073  public TestName name = new TestName();
074
075  @BeforeClass
076  public static void setUpBeforeClass() throws Exception {
077    // Test we can first start the ZK cluster by itself
078    Configuration conf = TEST_UTIL.getConfiguration();
079    TEST_UTIL.startMiniDFSCluster(2);
080    TEST_UTIL.startMiniZKCluster();
081    conf.set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY,
082      HConstants.ZK_CONNECTION_REGISTRY_CLASS);
083    conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
084    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class,
085      LoadBalancer.class);
086    TEST_UTIL.startMiniDFSCluster(2);
087  }
088
089  @AfterClass
090  public static void tearDownAfterClass() throws Exception {
091    TEST_UTIL.shutdownMiniCluster();
092  }
093
094  @Before
095  public void setUp() throws Exception {
096    StartTestingClusterOption option =
097      StartTestingClusterOption.builder().numMasters(2).numRegionServers(2).build();
098    TEST_UTIL.startMiniHBaseCluster(option);
099  }
100
101  @After
102  public void after() throws Exception {
103    try {
104      TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000);
105      // Some regionserver could fail to delete its znode.
106      // So shutdown could hang. Let's kill them all instead.
107      TEST_UTIL.getHBaseCluster().killAll();
108
109      // Still need to clean things up
110      TEST_UTIL.shutdownMiniHBaseCluster();
111    } finally {
112      TEST_UTIL.getTestFileSystem().delete(CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()),
113        true);
114      ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
115    }
116  }
117
118  @Test
119  public void testRegionServerSessionExpired() throws Exception {
120    LOG.info("Starting " + name.getMethodName());
121    TEST_UTIL.expireRegionServerSession(0);
122    testSanity(name.getMethodName());
123  }
124
125  @Test
126  public void testMasterSessionExpired() throws Exception {
127    LOG.info("Starting " + name.getMethodName());
128    TEST_UTIL.expireMasterSession();
129    testSanity(name.getMethodName());
130  }
131
132  /**
133   * Master recovery when the znode already exists. Internally, this test differs from
134   * {@link #testMasterSessionExpired} because here the master znode will exist in ZK.
135   */
136  @Test
137  public void testMasterZKSessionRecoveryFailure() throws Exception {
138    LOG.info("Starting " + name.getMethodName());
139    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
140    HMaster m = cluster.getMaster();
141    m.abort("Test recovery from zk session expired", new KeeperException.SessionExpiredException());
142    assertTrue(m.isStopped()); // Master doesn't recover any more
143    testSanity(name.getMethodName());
144  }
145
146  /**
147   * Make sure we can use the cluster
148   */
149  private void testSanity(final String testName) throws Exception {
150    String tableName = testName + "_" + EnvironmentEdgeManager.currentTime();
151    TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
152      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build();
153    LOG.info("Creating table " + tableName);
154    Admin admin = TEST_UTIL.getAdmin();
155    try {
156      admin.createTable(desc);
157    } finally {
158      admin.close();
159    }
160
161    Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
162    Put put = new Put(Bytes.toBytes("testrow"));
163    put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
164    LOG.info("Putting table " + tableName);
165    table.put(put);
166    table.close();
167  }
168
169  /**
170   * Tests that the master does not call retainAssignment after recovery from expired zookeeper
171   * session. Without the HBASE-6046 fix master always tries to assign all the user regions by
172   * calling retainAssignment.
173   */
174  @Test
175  public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception {
176    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
177    cluster.startRegionServer();
178    cluster.waitForActiveAndReadyMaster(10000);
179    HMaster m = cluster.getMaster();
180    final ZKWatcher zkw = m.getZooKeeper();
181    // now the cluster is up. So assign some regions.
182    try (Admin admin = TEST_UTIL.getAdmin()) {
183      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"),
184        Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"),
185        Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") };
186      TableDescriptor htd =
187        TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()))
188          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
189      admin.createTable(htd, SPLIT_KEYS);
190      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
191      m.getZooKeeper().close();
192      MockLoadBalancer.retainAssignCalled = false;
193      final int expectedNumOfListeners = countPermanentListeners(zkw);
194      // the master could already been aborted by some background tasks but here we call abort
195      // directly to make sure this will happen
196      m.abort("Test recovery from zk session expired",
197        new KeeperException.SessionExpiredException());
198      // it is possible that our abort call above returned earlier because of someone else has
199      // already called abort, but it is possible that it has not finished the abort call yet so the
200      // isStopped flag is still false, let's wait for sometime.
201      TEST_UTIL.waitFor(5000, () -> m.isStopped()); // Master doesn't recover any more
202
203      // The recovered master should not call retainAssignment, as it is not a
204      // clean startup.
205      assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled);
206      // number of listeners should be same as the value before master aborted
207      // wait for new master is initialized
208      cluster.waitForActiveAndReadyMaster(120000);
209      final HMaster newMaster = cluster.getMasterThread().getMaster();
210      assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper()));
211    }
212  }
213
214  /**
215   * Count listeners in zkw excluding listeners, that belongs to workers or other temporary
216   * processes.
217   */
218  private int countPermanentListeners(ZKWatcher watcher) {
219    return countListeners(watcher, ZkSplitLogWorkerCoordination.class);
220  }
221
222  /**
223   * Count listeners in zkw excluding provided classes
224   */
225  private int countListeners(ZKWatcher watcher, Class<?>... exclude) {
226    int cnt = 0;
227    for (Object o : watcher.getListeners()) {
228      boolean skip = false;
229      for (Class<?> aClass : exclude) {
230        if (aClass.isAssignableFrom(o.getClass())) {
231          skip = true;
232          break;
233        }
234      }
235      if (!skip) {
236        cnt += 1;
237      }
238    }
239    return cnt;
240  }
241
242  /**
243   * Tests whether the logs are split when master recovers from a expired zookeeper session and an
244   * RS goes down.
245   */
246  @Test
247  public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception {
248    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
249    cluster.startRegionServer();
250    TableName tableName = TableName.valueOf(name.getMethodName());
251    byte[] family = Bytes.toBytes("col");
252    try (Admin admin = TEST_UTIL.getAdmin()) {
253      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"),
254        Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") };
255      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
256        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
257      admin.createTable(htd, SPLIT_KEYS);
258    }
259    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
260    HMaster m = cluster.getMaster();
261    try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
262      int numberOfPuts;
263      for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) {
264        Put p = new Put(Bytes.toBytes(numberOfPuts));
265        p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"),
266          Bytes.toBytes("value" + numberOfPuts));
267        table.put(p);
268      }
269      m.abort("Test recovery from zk session expired",
270        new KeeperException.SessionExpiredException());
271      assertTrue(m.isStopped()); // Master doesn't recover any more
272      cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName());
273      // Without patch for HBASE-6046 this test case will always timeout
274      // with patch the test case should pass.
275      int numberOfRows = 0;
276      try (ResultScanner scanner = table.getScanner(new Scan())) {
277        while (scanner.next() != null) {
278          numberOfRows++;
279        }
280      }
281      assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows);
282    }
283  }
284
285  static class MockLoadBalancer extends SimpleLoadBalancer {
286    static boolean retainAssignCalled = false;
287
288    @Override
289    @NonNull
290    public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
291      List<ServerName> servers) throws HBaseIOException {
292      retainAssignCalled = true;
293      return super.retainAssignment(regions, servers);
294    }
295  }
296
297}