001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import edu.umd.cs.findbugs.annotations.NonNull;
025import java.util.List;
026import java.util.Map;
027
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hbase.client.Admin;
030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
031import org.apache.hadoop.hbase.client.Put;
032import org.apache.hadoop.hbase.client.RegionInfo;
033import org.apache.hadoop.hbase.client.ResultScanner;
034import org.apache.hadoop.hbase.client.Scan;
035import org.apache.hadoop.hbase.client.Table;
036import org.apache.hadoop.hbase.client.TableDescriptor;
037import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
038import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination;
039import org.apache.hadoop.hbase.master.HMaster;
040import org.apache.hadoop.hbase.master.LoadBalancer;
041import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
042import org.apache.hadoop.hbase.testclassification.MediumTests;
043import org.apache.hadoop.hbase.testclassification.MiscTests;
044import org.apache.hadoop.hbase.util.Bytes;
045import org.apache.hadoop.hbase.util.CommonFSUtils;
046import org.apache.hadoop.hbase.zookeeper.ZKUtil;
047import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
048import org.apache.zookeeper.KeeperException;
049import org.junit.After;
050import org.junit.AfterClass;
051import org.junit.Before;
052import org.junit.BeforeClass;
053import org.junit.ClassRule;
054import org.junit.Rule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.junit.rules.TestName;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061@Category({MiscTests.class, MediumTests.class})
062public class TestZooKeeper {
063
064  @ClassRule
065  public static final HBaseClassTestRule CLASS_RULE =
066      HBaseClassTestRule.forClass(TestZooKeeper.class);
067
068  private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class);
069
070  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
071
072  @Rule
073  public TestName name = new TestName();
074
075  @BeforeClass
076  public static void setUpBeforeClass() throws Exception {
077    // Test we can first start the ZK cluster by itself
078    Configuration conf = TEST_UTIL.getConfiguration();
079    // A couple of tests rely on master expiring ZK session, hence killing the only master. So it
080    // makes sense only for ZK registry. Enforcing it.
081    conf.set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY,
082        HConstants.ZK_CONNECTION_REGISTRY_CLASS);
083    TEST_UTIL.startMiniDFSCluster(2);
084    TEST_UTIL.startMiniZKCluster();
085    conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
086    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class,
087        LoadBalancer.class);
088    TEST_UTIL.startMiniDFSCluster(2);
089  }
090
091  @AfterClass
092  public static void tearDownAfterClass() throws Exception {
093    TEST_UTIL.shutdownMiniCluster();
094  }
095
096  @Before
097  public void setUp() throws Exception {
098    StartMiniClusterOption option = StartMiniClusterOption.builder()
099        .numMasters(2).numRegionServers(2).build();
100    TEST_UTIL.startMiniHBaseCluster(option);
101  }
102
103  @After
104  public void after() throws Exception {
105    try {
106      TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000);
107      // Some regionserver could fail to delete its znode.
108      // So shutdown could hang. Let's kill them all instead.
109      TEST_UTIL.getHBaseCluster().killAll();
110
111      // Still need to clean things up
112      TEST_UTIL.shutdownMiniHBaseCluster();
113    } finally {
114      TEST_UTIL.getTestFileSystem().delete(CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()),
115        true);
116      ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
117    }
118  }
119
120  @Test
121  public void testRegionServerSessionExpired() throws Exception {
122    LOG.info("Starting " + name.getMethodName());
123    TEST_UTIL.expireRegionServerSession(0);
124    testSanity(name.getMethodName());
125  }
126
127  @Test
128  public void testMasterSessionExpired() throws Exception {
129    LOG.info("Starting " + name.getMethodName());
130    TEST_UTIL.expireMasterSession();
131    testSanity(name.getMethodName());
132  }
133
134  /**
135   * Master recovery when the znode already exists. Internally, this
136   *  test differs from {@link #testMasterSessionExpired} because here
137   *  the master znode will exist in ZK.
138   */
139  @Test
140  public void testMasterZKSessionRecoveryFailure() throws Exception {
141    LOG.info("Starting " + name.getMethodName());
142    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
143    HMaster m = cluster.getMaster();
144    m.abort("Test recovery from zk session expired",
145        new KeeperException.SessionExpiredException());
146    assertTrue(m.isStopped()); // Master doesn't recover any more
147    testSanity(name.getMethodName());
148  }
149
150  /**
151   * Make sure we can use the cluster
152   */
153  private void testSanity(final String testName) throws Exception {
154    String tableName = testName + "_" + System.currentTimeMillis();
155    TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
156        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build();
157    LOG.info("Creating table " + tableName);
158    Admin admin = TEST_UTIL.getAdmin();
159    try {
160      admin.createTable(desc);
161    } finally {
162      admin.close();
163    }
164
165    Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
166    Put put = new Put(Bytes.toBytes("testrow"));
167    put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
168    LOG.info("Putting table " + tableName);
169    table.put(put);
170    table.close();
171  }
172
173  /**
174   * Tests that the master does not call retainAssignment after recovery from expired zookeeper
175   * session. Without the HBASE-6046 fix master always tries to assign all the user regions by
176   * calling retainAssignment.
177   */
178  @Test
179  public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception {
180    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
181    cluster.startRegionServer();
182    cluster.waitForActiveAndReadyMaster(10000);
183    HMaster m = cluster.getMaster();
184    final ZKWatcher zkw = m.getZooKeeper();
185    // now the cluster is up. So assign some regions.
186    try (Admin admin = TEST_UTIL.getAdmin()) {
187      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"),
188          Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"),
189          Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") };
190      TableDescriptor htd =
191          TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()))
192              .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
193      admin.createTable(htd, SPLIT_KEYS);
194      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
195      m.getZooKeeper().close();
196      MockLoadBalancer.retainAssignCalled = false;
197      final int expectedNumOfListeners = countPermanentListeners(zkw);
198      m.abort("Test recovery from zk session expired",
199          new KeeperException.SessionExpiredException());
200      assertTrue(m.isStopped()); // Master doesn't recover any more
201      // The recovered master should not call retainAssignment, as it is not a
202      // clean startup.
203      assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled);
204      // number of listeners should be same as the value before master aborted
205      // wait for new master is initialized
206      cluster.waitForActiveAndReadyMaster(120000);
207      final HMaster newMaster = cluster.getMasterThread().getMaster();
208      assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper()));
209    }
210  }
211
212  /**
213   * Count listeners in zkw excluding listeners, that belongs to workers or other
214   * temporary processes.
215   */
216  private int countPermanentListeners(ZKWatcher watcher) {
217    return countListeners(watcher, ZkSplitLogWorkerCoordination.class);
218  }
219
220  /**
221   * Count listeners in zkw excluding provided classes
222   */
223  private int countListeners(ZKWatcher watcher, Class<?>... exclude) {
224    int cnt = 0;
225    for (Object o : watcher.getListeners()) {
226      boolean skip = false;
227      for (Class<?> aClass : exclude) {
228        if (aClass.isAssignableFrom(o.getClass())) {
229          skip = true;
230          break;
231        }
232      }
233      if (!skip) {
234        cnt += 1;
235      }
236    }
237    return cnt;
238  }
239
240  /**
241   * Tests whether the logs are split when master recovers from a expired zookeeper session and an
242   * RS goes down.
243   */
244  @Test
245  public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception {
246    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
247    cluster.startRegionServer();
248    TableName tableName = TableName.valueOf(name.getMethodName());
249    byte[] family = Bytes.toBytes("col");
250    try (Admin admin = TEST_UTIL.getAdmin()) {
251      byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"),
252        Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") };
253      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
254          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
255      admin.createTable(htd, SPLIT_KEYS);
256    }
257    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
258    HMaster m = cluster.getMaster();
259    try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
260      int numberOfPuts;
261      for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) {
262        Put p = new Put(Bytes.toBytes(numberOfPuts));
263        p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"),
264          Bytes.toBytes("value" + numberOfPuts));
265        table.put(p);
266      }
267      m.abort("Test recovery from zk session expired",
268        new KeeperException.SessionExpiredException());
269      assertTrue(m.isStopped()); // Master doesn't recover any more
270      cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName());
271      // Without patch for HBASE-6046 this test case will always timeout
272      // with patch the test case should pass.
273      int numberOfRows = 0;
274      try (ResultScanner scanner = table.getScanner(new Scan())) {
275        while (scanner.next() != null) {
276          numberOfRows++;
277        }
278      }
279      assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows);
280    }
281  }
282
283  static class MockLoadBalancer extends SimpleLoadBalancer {
284    static boolean retainAssignCalled = false;
285
286    @Override
287    @NonNull
288    public Map<ServerName, List<RegionInfo>> retainAssignment(
289        Map<RegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
290      retainAssignCalled = true;
291      return super.retainAssignment(regions, servers);
292    }
293  }
294
295}
296