001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import java.io.IOException;
021import java.util.Arrays;
022import java.util.List;
023import org.apache.hadoop.fs.Path;
024import org.apache.hadoop.hbase.HBaseClassTestRule;
025import org.apache.hadoop.hbase.HBaseTestingUtil;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.regionserver.HRegionServer;
028import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
029import org.apache.hadoop.hbase.testclassification.LargeTests;
030import org.apache.hadoop.hbase.testclassification.RegionServerTests;
031import org.apache.hadoop.hbase.util.CommonFSUtils;
032import org.junit.After;
033import org.junit.AfterClass;
034import org.junit.Before;
035import org.junit.BeforeClass;
036import org.junit.ClassRule;
037import org.junit.Test;
038import org.junit.experimental.categories.Category;
039import org.junit.runner.RunWith;
040import org.junit.runners.Parameterized;
041import org.junit.runners.Parameterized.Parameter;
042import org.junit.runners.Parameterized.Parameters;
043
044@RunWith(Parameterized.class)
045@Category({ RegionServerTests.class, LargeTests.class })
046public class TestWALOpenAfterDNRollingStart {
047
048  @ClassRule
049  public static final HBaseClassTestRule CLASS_RULE =
050    HBaseClassTestRule.forClass(TestWALOpenAfterDNRollingStart.class);
051
052  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
053  // Sleep time before restart next dn, we need to wait the current dn to finish start up
054  private static long DN_RESTART_INTERVAL = 15000;
055
056  // interval of checking low replication. The sleep time must smaller than
057  // DataNodeRestartInterval
058  // so a low replication case will be detected and the wal will be rolled
059  private static long CHECK_LOW_REPLICATION_INTERVAL = 10000;
060
061  @Parameter
062  public String walProvider;
063
064  @Parameters(name = "{index}: wal={0}")
065  public static List<Object[]> data() {
066    return Arrays.asList(new Object[] { "asyncfs" }, new Object[] { "filesystem" });
067  }
068
069  @BeforeClass
070  public static void setUpBeforeClass() throws Exception {
071    // don't let hdfs client to choose a new replica when dn down
072    TEST_UTIL.getConfiguration()
073      .setBoolean("dfs.client.block.write.replace-datanode-on-failure.enable", false);
074    TEST_UTIL.getConfiguration().setLong("hbase.regionserver.hlog.check.lowreplication.interval",
075      CHECK_LOW_REPLICATION_INTERVAL);
076    TEST_UTIL.startMiniDFSCluster(3);
077    TEST_UTIL.startMiniZKCluster();
078  }
079
080  @Before
081  public void setUp() throws IOException, InterruptedException {
082    TEST_UTIL.getConfiguration().set("hbase.wal.provider", walProvider);
083    TEST_UTIL.startMiniHBaseCluster();
084  }
085
086  @After
087  public void tearDown() throws Exception {
088    TEST_UTIL.shutdownMiniHBaseCluster();
089  }
090
091  @AfterClass
092  public static void tearDownAfterClass() throws Exception {
093    TEST_UTIL.shutdownMiniCluster();
094  }
095
096  /**
097   * see HBASE-18132 This is a test case of failing open a wal(for replication for example) after
098   * all datanode restarted (rolling upgrade, for example). Before this patch, low replication
099   * detection is only used when syncing wal. But if the wal haven't had any entry whiten, it will
100   * never know all the replica of the wal is broken(because of dn restarting). And this wal can
101   * never be open
102   */
103  @Test
104  public void test() throws Exception {
105    HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
106    AbstractFSWAL<?> wal = (AbstractFSWAL<?>) server.getWAL(null);
107    Path currentFile = wal.getCurrentFileName();
108    // restart every dn to simulate a dn rolling upgrade
109    for (int i = 0, n = TEST_UTIL.getDFSCluster().getDataNodes().size(); i < n; i++) {
110      // This is NOT a bug, when restart dn in miniDFSCluster, it will remove the stopped dn from
111      // the dn list and then add to the tail of this list, we need to always restart the first one
112      // to simulate rolling upgrade of every dn.
113      TEST_UTIL.getDFSCluster().restartDataNode(0);
114      // sleep enough time so log roller can detect the pipeline break and roll log
115      Thread.sleep(DN_RESTART_INTERVAL);
116    }
117
118    if (!server.getFileSystem().exists(currentFile)) {
119      Path walRootDir = CommonFSUtils.getWALRootDir(TEST_UTIL.getConfiguration());
120      final Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
121      currentFile = new Path(oldLogDir, currentFile.getName());
122    }
123    // if the log is not rolled, then we can never open this wal forever.
124    try (WALStreamReader reader = NoEOFWALStreamReader.create(TEST_UTIL.getTestFileSystem(),
125      currentFile, TEST_UTIL.getConfiguration())) {
126      reader.next();
127    }
128  }
129}