001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import java.io.IOException; 021import java.util.Arrays; 022import java.util.List; 023import org.apache.hadoop.fs.Path; 024import org.apache.hadoop.hbase.HBaseClassTestRule; 025import org.apache.hadoop.hbase.HBaseTestingUtility; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.regionserver.HRegionServer; 028import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL; 029import org.apache.hadoop.hbase.testclassification.LargeTests; 030import org.apache.hadoop.hbase.testclassification.RegionServerTests; 031import org.apache.hadoop.hbase.util.CommonFSUtils; 032import org.junit.After; 033import org.junit.AfterClass; 034import org.junit.Before; 035import org.junit.BeforeClass; 036import org.junit.ClassRule; 037import org.junit.Test; 038import org.junit.experimental.categories.Category; 039import org.junit.runner.RunWith; 040import org.junit.runners.Parameterized; 041import org.junit.runners.Parameterized.Parameter; 042import org.junit.runners.Parameterized.Parameters; 043 044@RunWith(Parameterized.class) 045@Category({ RegionServerTests.class, LargeTests.class }) 046public class TestWALOpenAfterDNRollingStart { 047 048 @ClassRule 049 public static final HBaseClassTestRule CLASS_RULE = 050 HBaseClassTestRule.forClass(TestWALOpenAfterDNRollingStart.class); 051 052 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 053 // Sleep time before restart next dn, we need to wait the current dn to finish start up 054 private static long DN_RESTART_INTERVAL = 15000; 055 056 // interval of checking low replication. The sleep time must smaller than 057 // DataNodeRestartInterval 058 // so a low replication case will be detected and the wal will be rolled 059 private static long CHECK_LOW_REPLICATION_INTERVAL = 10000; 060 061 @Parameter 062 public String walProvider; 063 064 @Parameters(name = "{index}: wal={0}") 065 public static List<Object[]> data() { 066 return Arrays.asList(new Object[] { "asyncfs" }, new Object[] { "filesystem" }); 067 } 068 069 @BeforeClass 070 public static void setUpBeforeClass() throws Exception { 071 // don't let hdfs client to choose a new replica when dn down 072 TEST_UTIL.getConfiguration() 073 .setBoolean("dfs.client.block.write.replace-datanode-on-failure.enable", false); 074 TEST_UTIL.getConfiguration().setLong("hbase.regionserver.hlog.check.lowreplication.interval", 075 CHECK_LOW_REPLICATION_INTERVAL); 076 TEST_UTIL.startMiniDFSCluster(3); 077 TEST_UTIL.startMiniZKCluster(); 078 } 079 080 @Before 081 public void setUp() throws IOException, InterruptedException { 082 TEST_UTIL.getConfiguration().set("hbase.wal.provider", walProvider); 083 TEST_UTIL.startMiniHBaseCluster(); 084 } 085 086 @After 087 public void tearDown() throws Exception { 088 TEST_UTIL.shutdownMiniHBaseCluster(); 089 } 090 091 @AfterClass 092 public static void tearDownAfterClass() throws Exception { 093 TEST_UTIL.shutdownMiniCluster(); 094 } 095 096 /** 097 * see HBASE-18132 This is a test case of failing open a wal(for replication for example) after 098 * all datanode restarted (rolling upgrade, for example). Before this patch, low replication 099 * detection is only used when syncing wal. But if the wal haven't had any entry whiten, it will 100 * never know all the replica of the wal is broken(because of dn restarting). And this wal can 101 * never be open 102 * @throws Exception 103 */ 104 @Test 105 public void test() throws Exception { 106 HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0); 107 AbstractFSWAL<?> wal = (AbstractFSWAL<?>) server.getWAL(null); 108 Path currentFile = wal.getCurrentFileName(); 109 // restart every dn to simulate a dn rolling upgrade 110 for (int i = 0, n = TEST_UTIL.getDFSCluster().getDataNodes().size(); i < n; i++) { 111 // This is NOT a bug, when restart dn in miniDFSCluster, it will remove the stopped dn from 112 // the dn list and then add to the tail of this list, we need to always restart the first one 113 // to simulate rolling upgrade of every dn. 114 TEST_UTIL.getDFSCluster().restartDataNode(0); 115 // sleep enough time so log roller can detect the pipeline break and roll log 116 Thread.sleep(DN_RESTART_INTERVAL); 117 } 118 119 if (!server.getFileSystem().exists(currentFile)) { 120 Path walRootDir = CommonFSUtils.getWALRootDir(TEST_UTIL.getConfiguration()); 121 final Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME); 122 currentFile = new Path(oldLogDir, currentFile.getName()); 123 } 124 // if the log is not rolled, then we can never open this wal forever. 125 try (WAL.Reader reader = WALFactory.createReader(TEST_UTIL.getTestFileSystem(), currentFile, 126 TEST_UTIL.getConfiguration())) { 127 reader.next(); 128 } 129 } 130}