001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.util.List;
025import java.util.Map;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.client.Admin;
028import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
029import org.apache.hadoop.hbase.client.Put;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.client.ResultScanner;
032import org.apache.hadoop.hbase.client.Scan;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
036import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination;
037import org.apache.hadoop.hbase.master.HMaster;
038import org.apache.hadoop.hbase.master.LoadBalancer;
039import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
040import org.apache.hadoop.hbase.testclassification.MediumTests;
041import org.apache.hadoop.hbase.testclassification.MiscTests;
042import org.apache.hadoop.hbase.util.Bytes;
043import org.apache.hadoop.hbase.util.CommonFSUtils;
044import org.apache.hadoop.hbase.zookeeper.ZKUtil;
045import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
046import org.apache.zookeeper.KeeperException;
047import org.junit.After;
048import org.junit.AfterClass;
049import org.junit.Before;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059@Category({MiscTests.class, MediumTests.class})
060public class TestZooKeeper {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064      HBaseClassTestRule.forClass(TestZooKeeper.class);
065
066  private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class);
067
068  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
069
070  @Rule
071  public TestName name = new TestName();
072
073  @BeforeClass
074  public static void setUpBeforeClass() throws Exception {
075    // Test we can first start the ZK cluster by itself
076    Configuration conf = TEST_UTIL.getConfiguration();
077    // A couple of tests rely on master expiring ZK session, hence killing the only master. So it
078    // makes sense only for ZK registry. Enforcing it.
079    conf.set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY,
080        HConstants.ZK_CONNECTION_REGISTRY_CLASS);
081    TEST_UTIL.startMiniDFSCluster(2);
082    TEST_UTIL.startMiniZKCluster();
083    conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
084    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class,
085        LoadBalancer.class);
086    TEST_UTIL.startMiniDFSCluster(2);
087  }
088
089  @AfterClass
090  public static void tearDownAfterClass() throws Exception {
091    TEST_UTIL.shutdownMiniCluster();
092  }
093
094  @Before
095  public void setUp() throws Exception {
096    StartMiniClusterOption option = StartMiniClusterOption.builder()
097        .numMasters(2).numRegionServers(2).build();
098    TEST_UTIL.startMiniHBaseCluster(option);
099  }
100
101  @After
102  public void after() throws Exception {
103    try {
104      TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000);
105      // Some regionserver could fail to delete its znode.
106      // So shutdown could hang. Let's kill them all instead.
107      TEST_UTIL.getHBaseCluster().killAll();
108
109      // Still need to clean things up
110      TEST_UTIL.shutdownMiniHBaseCluster();
111    } finally {
112      TEST_UTIL.getTestFileSystem().delete(CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()),
113        true);
114      ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
115    }
116  }
117
118  @Test
119  public void testRegionServerSessionExpired() throws Exception {
120    LOG.info("Starting " + name.getMethodName());
121    TEST_UTIL.expireRegionServerSession(0);
122    testSanity(name.getMethodName());
123  }
124
125  @Test
126  public void testMasterSessionExpired() throws Exception {
127    LOG.info("Starting " + name.getMethodName());
128    TEST_UTIL.expireMasterSession();
129    testSanity(name.getMethodName());
130  }
131
132  /**
133   * Master recovery when the znode already exists. Internally, this
134   *  test differs from {@link #testMasterSessionExpired} because here
135   *  the master znode will exist in ZK.
136   */
137  @Test
138  public void testMasterZKSessionRecoveryFailure() throws Exception {
139    LOG.info("Starting " + name.getMethodName());
140    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
141    HMaster m = cluster.getMaster();
142    m.abort("Test recovery from zk session expired",
143        new KeeperException.SessionExpiredException());
144    assertTrue(m.isStopped()); // Master doesn't recover any more
145    testSanity(name.getMethodName());
146  }
147
148  /**
149   * Make sure we can use the cluster
150   */
151  private void testSanity(final String testName) throws Exception {
152    String tableName = testName + "_" + System.currentTimeMillis();
153    TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
154        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build();
155    LOG.info("Creating table " + tableName);
156    Admin admin = TEST_UTIL.getAdmin();
157    try {
158      admin.createTable(desc);
159    } finally {
160      admin.close();
161    }
162
163    Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
164    Put put = new Put(Bytes.toBytes("testrow"));
165    put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
166    LOG.info("Putting table " + tableName);
167    table.put(put);
168    table.close();
169  }
170
171  /**
172   * Tests that the master does not call retainAssignment after recovery from expired zookeeper
173   * session. Without the HBASE-6046 fix master always tries to assign all the user regions by
174   * calling retainAssignment.
175   */
176  @Test
177  public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception {
178    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
179    cluster.startRegionServer();
180    cluster.waitForActiveAndReadyMaster(10000);
181    HMaster m = cluster.getMaster();
182    final ZKWatcher zkw = m.getZooKeeper();
183    // now the cluster is up. So assign some regions.
184    try (Admin admin = TEST_UTIL.getAdmin()) {
185      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"),
186          Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"),
187          Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") };
188      TableDescriptor htd =
189          TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()))
190              .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
191      admin.createTable(htd, SPLIT_KEYS);
192      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
193      m.getZooKeeper().close();
194      MockLoadBalancer.retainAssignCalled = false;
195      final int expectedNumOfListeners = countPermanentListeners(zkw);
196      m.abort("Test recovery from zk session expired",
197          new KeeperException.SessionExpiredException());
198      assertTrue(m.isStopped()); // Master doesn't recover any more
199      // The recovered master should not call retainAssignment, as it is not a
200      // clean startup.
201      assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled);
202      // number of listeners should be same as the value before master aborted
203      // wait for new master is initialized
204      cluster.waitForActiveAndReadyMaster(120000);
205      final HMaster newMaster = cluster.getMasterThread().getMaster();
206      assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper()));
207    }
208  }
209
210  /**
211   * Count listeners in zkw excluding listeners, that belongs to workers or other
212   * temporary processes.
213   */
214  private int countPermanentListeners(ZKWatcher watcher) {
215    return countListeners(watcher, ZkSplitLogWorkerCoordination.class);
216  }
217
218  /**
219   * Count listeners in zkw excluding provided classes
220   */
221  private int countListeners(ZKWatcher watcher, Class<?>... exclude) {
222    int cnt = 0;
223    for (Object o : watcher.getListeners()) {
224      boolean skip = false;
225      for (Class<?> aClass : exclude) {
226        if (aClass.isAssignableFrom(o.getClass())) {
227          skip = true;
228          break;
229        }
230      }
231      if (!skip) {
232        cnt += 1;
233      }
234    }
235    return cnt;
236  }
237
238  /**
239   * Tests whether the logs are split when master recovers from a expired zookeeper session and an
240   * RS goes down.
241   */
242  @Test
243  public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception {
244    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
245    cluster.startRegionServer();
246    TableName tableName = TableName.valueOf(name.getMethodName());
247    byte[] family = Bytes.toBytes("col");
248    try (Admin admin = TEST_UTIL.getAdmin()) {
249      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"),
250        Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") };
251      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
252          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
253      admin.createTable(htd, SPLIT_KEYS);
254    }
255    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
256    HMaster m = cluster.getMaster();
257    try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
258      int numberOfPuts;
259      for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) {
260        Put p = new Put(Bytes.toBytes(numberOfPuts));
261        p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"),
262          Bytes.toBytes("value" + numberOfPuts));
263        table.put(p);
264      }
265      m.abort("Test recovery from zk session expired",
266        new KeeperException.SessionExpiredException());
267      assertTrue(m.isStopped()); // Master doesn't recover any more
268      cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName());
269      // Without patch for HBASE-6046 this test case will always timeout
270      // with patch the test case should pass.
271      int numberOfRows = 0;
272      try (ResultScanner scanner = table.getScanner(new Scan())) {
273        while (scanner.next() != null) {
274          numberOfRows++;
275        }
276      }
277      assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows);
278    }
279  }
280
281  static class MockLoadBalancer extends SimpleLoadBalancer {
282    static boolean retainAssignCalled = false;
283
284    @Override
285    public Map<ServerName, List<RegionInfo>> retainAssignment(
286        Map<RegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
287      retainAssignCalled = true;
288      return super.retainAssignment(regions, servers);
289    }
290  }
291
292}
293