001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.util.List;
025import java.util.Map;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.client.Admin;
028import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
029import org.apache.hadoop.hbase.client.Put;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.client.ResultScanner;
032import org.apache.hadoop.hbase.client.Scan;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
036import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination;
037import org.apache.hadoop.hbase.master.HMaster;
038import org.apache.hadoop.hbase.master.LoadBalancer;
039import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
040import org.apache.hadoop.hbase.testclassification.LargeTests;
041import org.apache.hadoop.hbase.testclassification.MiscTests;
042import org.apache.hadoop.hbase.util.Bytes;
043import org.apache.hadoop.hbase.util.FSUtils;
044import org.apache.hadoop.hbase.zookeeper.ZKUtil;
045import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
046import org.apache.zookeeper.KeeperException;
047import org.junit.After;
048import org.junit.AfterClass;
049import org.junit.Before;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059@Category({MiscTests.class, LargeTests.class})
060public class TestZooKeeper {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064      HBaseClassTestRule.forClass(TestZooKeeper.class);
065
066  private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class);
067
068  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
069
070  @Rule
071  public TestName name = new TestName();
072
073  @BeforeClass
074  public static void setUpBeforeClass() throws Exception {
075    // Test we can first start the ZK cluster by itself
076    Configuration conf = TEST_UTIL.getConfiguration();
077    TEST_UTIL.startMiniDFSCluster(2);
078    TEST_UTIL.startMiniZKCluster();
079    conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
080    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class,
081        LoadBalancer.class);
082  }
083
084  @AfterClass
085  public static void tearDownAfterClass() throws Exception {
086    TEST_UTIL.shutdownMiniCluster();
087  }
088
089  @Before
090  public void setUp() throws Exception {
091    StartMiniClusterOption option = StartMiniClusterOption.builder()
092        .numMasters(2).numRegionServers(2).build();
093    TEST_UTIL.startMiniHBaseCluster(option);
094  }
095
096  @After
097  public void after() throws Exception {
098    try {
099      TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000);
100      // Some regionserver could fail to delete its znode.
101      // So shutdown could hang. Let's kill them all instead.
102      TEST_UTIL.getHBaseCluster().killAll();
103
104      // Still need to clean things up
105      TEST_UTIL.shutdownMiniHBaseCluster();
106    } finally {
107      TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
108      ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
109    }
110  }
111
112  @Test
113  public void testRegionServerSessionExpired() throws Exception {
114    LOG.info("Starting " + name.getMethodName());
115    TEST_UTIL.expireRegionServerSession(0);
116    testSanity(name.getMethodName());
117  }
118
119  @Test
120  public void testMasterSessionExpired() throws Exception {
121    LOG.info("Starting " + name.getMethodName());
122    TEST_UTIL.expireMasterSession();
123    testSanity(name.getMethodName());
124  }
125
126  /**
127   * Master recovery when the znode already exists. Internally, this
128   *  test differs from {@link #testMasterSessionExpired} because here
129   *  the master znode will exist in ZK.
130   */
131  @Test
132  public void testMasterZKSessionRecoveryFailure() throws Exception {
133    LOG.info("Starting " + name.getMethodName());
134    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
135    HMaster m = cluster.getMaster();
136    m.abort("Test recovery from zk session expired",
137        new KeeperException.SessionExpiredException());
138    assertTrue(m.isStopped()); // Master doesn't recover any more
139    testSanity(name.getMethodName());
140  }
141
142  /**
143   * Make sure we can use the cluster
144   */
145  private void testSanity(final String testName) throws Exception {
146    String tableName = testName + "_" + System.currentTimeMillis();
147    TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
148        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build();
149    LOG.info("Creating table " + tableName);
150    Admin admin = TEST_UTIL.getAdmin();
151    try {
152      admin.createTable(desc);
153    } finally {
154      admin.close();
155    }
156
157    Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
158    Put put = new Put(Bytes.toBytes("testrow"));
159    put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
160    LOG.info("Putting table " + tableName);
161    table.put(put);
162    table.close();
163  }
164
165  /**
166   * Tests that the master does not call retainAssignment after recovery from expired zookeeper
167   * session. Without the HBASE-6046 fix master always tries to assign all the user regions by
168   * calling retainAssignment.
169   */
170  @Test
171  public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception {
172    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
173    cluster.startRegionServer();
174    cluster.waitForActiveAndReadyMaster(10000);
175    HMaster m = cluster.getMaster();
176    final ZKWatcher zkw = m.getZooKeeper();
177    // now the cluster is up. So assign some regions.
178    try (Admin admin = TEST_UTIL.getAdmin()) {
179      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"),
180          Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"),
181          Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") };
182      TableDescriptor htd =
183          TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()))
184              .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
185      admin.createTable(htd, SPLIT_KEYS);
186      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
187      m.getZooKeeper().close();
188      MockLoadBalancer.retainAssignCalled = false;
189      final int expectedNumOfListeners = countPermanentListeners(zkw);
190      m.abort("Test recovery from zk session expired",
191          new KeeperException.SessionExpiredException());
192      assertTrue(m.isStopped()); // Master doesn't recover any more
193      // The recovered master should not call retainAssignment, as it is not a
194      // clean startup.
195      assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled);
196      // number of listeners should be same as the value before master aborted
197      // wait for new master is initialized
198      cluster.waitForActiveAndReadyMaster(120000);
199      final HMaster newMaster = cluster.getMasterThread().getMaster();
200      assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper()));
201    }
202  }
203
204  /**
205   * Count listeners in zkw excluding listeners, that belongs to workers or other
206   * temporary processes.
207   */
208  private int countPermanentListeners(ZKWatcher watcher) {
209    return countListeners(watcher, ZkSplitLogWorkerCoordination.class);
210  }
211
212  /**
213   * Count listeners in zkw excluding provided classes
214   */
215  private int countListeners(ZKWatcher watcher, Class<?>... exclude) {
216    int cnt = 0;
217    for (Object o : watcher.getListeners()) {
218      boolean skip = false;
219      for (Class<?> aClass : exclude) {
220        if (aClass.isAssignableFrom(o.getClass())) {
221          skip = true;
222          break;
223        }
224      }
225      if (!skip) {
226        cnt += 1;
227      }
228    }
229    return cnt;
230  }
231
232  /**
233   * Tests whether the logs are split when master recovers from a expired zookeeper session and an
234   * RS goes down.
235   */
236  @Test
237  public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception {
238    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
239    cluster.startRegionServer();
240    TableName tableName = TableName.valueOf(name.getMethodName());
241    byte[] family = Bytes.toBytes("col");
242    try (Admin admin = TEST_UTIL.getAdmin()) {
243      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"),
244        Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") };
245      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
246          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
247      admin.createTable(htd, SPLIT_KEYS);
248    }
249    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
250    HMaster m = cluster.getMaster();
251    try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
252      int numberOfPuts;
253      for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) {
254        Put p = new Put(Bytes.toBytes(numberOfPuts));
255        p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"),
256          Bytes.toBytes("value" + numberOfPuts));
257        table.put(p);
258      }
259      m.abort("Test recovery from zk session expired",
260        new KeeperException.SessionExpiredException());
261      assertTrue(m.isStopped()); // Master doesn't recover any more
262      cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName());
263      // Without patch for HBASE-6046 this test case will always timeout
264      // with patch the test case should pass.
265      int numberOfRows = 0;
266      try (ResultScanner scanner = table.getScanner(new Scan())) {
267        while (scanner.next() != null) {
268          numberOfRows++;
269        }
270      }
271      assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows);
272    }
273  }
274
275  static class MockLoadBalancer extends SimpleLoadBalancer {
276    static boolean retainAssignCalled = false;
277
278    @Override
279    public Map<ServerName, List<RegionInfo>> retainAssignment(
280        Map<RegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
281      retainAssignCalled = true;
282      return super.retainAssignment(regions, servers);
283    }
284  }
285
286}
287