001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.wal; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.NavigableMap; 023import java.util.TreeMap; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.FileSystem; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtility; 029import org.apache.hadoop.hbase.HConstants; 030import org.apache.hadoop.hbase.KeyValue; 031import org.apache.hadoop.hbase.MiniHBaseCluster; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.Admin; 035import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.RegionInfoBuilder; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 042import org.apache.hadoop.hbase.log.HBaseMarkers; 043import org.apache.hadoop.hbase.regionserver.HRegionServer; 044import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl; 045import org.apache.hadoop.hbase.testclassification.MediumTests; 046import org.apache.hadoop.hbase.testclassification.RegionServerTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.CommonFSUtils; 049import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 050import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; 051import org.apache.hadoop.hbase.wal.WAL; 052import org.apache.hadoop.hbase.wal.WALEdit; 053import org.apache.hadoop.hbase.wal.WALFactory; 054import org.apache.hadoop.hbase.wal.WALKeyImpl; 055import org.apache.hadoop.hbase.wal.WALSplitter; 056import org.apache.hadoop.hdfs.MiniDFSCluster; 057import org.junit.After; 058import org.junit.Assert; 059import org.junit.Before; 060import org.junit.BeforeClass; 061import org.junit.ClassRule; 062import org.junit.Test; 063import org.junit.experimental.categories.Category; 064import org.slf4j.Logger; 065import org.slf4j.LoggerFactory; 066 067/** 068 * Tests for conditions that should trigger RegionServer aborts when rolling the current WAL fails. 069 */ 070@Category({ RegionServerTests.class, MediumTests.class }) 071public class TestLogRollAbort { 072 073 @ClassRule 074 public static final HBaseClassTestRule CLASS_RULE = 075 HBaseClassTestRule.forClass(TestLogRollAbort.class); 076 077 private static final Logger LOG = LoggerFactory.getLogger(AbstractTestLogRolling.class); 078 private static MiniDFSCluster dfsCluster; 079 private static Admin admin; 080 private static MiniHBaseCluster cluster; 081 protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 082 083 /* For the split-then-roll test */ 084 private static final Path HBASEDIR = new Path("/hbase"); 085 private static final Path HBASELOGDIR = new Path("/hbaselog"); 086 private static final Path OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME); 087 088 // Need to override this setup so we can edit the config before it gets sent 089 // to the HDFS & HBase cluster startup. 090 @BeforeClass 091 public static void setUpBeforeClass() throws Exception { 092 // Tweak default timeout values down for faster recovery 093 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.logroll.errors.tolerated", 2); 094 TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000); 095 096 // Increase the amount of time between client retries 097 TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000); 098 099 // lower the namenode & datanode heartbeat so the namenode 100 // quickly detects datanode failures 101 TEST_UTIL.getConfiguration().setInt("dfs.namenode.heartbeat.recheck-interval", 5000); 102 TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1); 103 // the namenode might still try to choose the recently-dead datanode 104 // for a pipeline, so try to a new pipeline multiple times 105 TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10); 106 TEST_UTIL.getConfiguration().set(WALFactory.WAL_PROVIDER, "filesystem"); 107 } 108 109 private Configuration conf; 110 private FileSystem fs; 111 112 @Before 113 public void setUp() throws Exception { 114 TEST_UTIL.startMiniCluster(2); 115 116 cluster = TEST_UTIL.getHBaseCluster(); 117 dfsCluster = TEST_UTIL.getDFSCluster(); 118 admin = TEST_UTIL.getAdmin(); 119 conf = TEST_UTIL.getConfiguration(); 120 fs = TEST_UTIL.getDFSCluster().getFileSystem(); 121 122 // disable region rebalancing (interferes with log watching) 123 cluster.getMaster().balanceSwitch(false); 124 CommonFSUtils.setRootDir(conf, HBASEDIR); 125 CommonFSUtils.setWALRootDir(conf, HBASELOGDIR); 126 } 127 128 @After 129 public void tearDown() throws Exception { 130 TEST_UTIL.shutdownMiniCluster(); 131 } 132 133 /** 134 * Tests that RegionServer aborts if we hit an error closing the WAL when there are unsynced WAL 135 * edits. See HBASE-4282. 136 */ 137 @Test 138 public void testRSAbortWithUnflushedEdits() throws Exception { 139 LOG.info("Starting testRSAbortWithUnflushedEdits()"); 140 141 // When the hbase:meta table can be opened, the region servers are running 142 TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME).close(); 143 144 // Create the test table and open it 145 TableName tableName = TableName.valueOf(this.getClass().getSimpleName()); 146 TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 147 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 148 149 admin.createTable(desc); 150 Table table = TEST_UTIL.getConnection().getTable(tableName); 151 try { 152 HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(tableName); 153 WAL log = server.getWAL(null); 154 155 Put p = new Put(Bytes.toBytes("row2001")); 156 p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001)); 157 table.put(p); 158 159 log.sync(); 160 161 p = new Put(Bytes.toBytes("row2002")); 162 p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002)); 163 table.put(p); 164 165 dfsCluster.restartDataNodes(); 166 LOG.info("Restarted datanodes"); 167 168 try { 169 log.rollWriter(true); 170 } catch (FailedLogCloseException flce) { 171 // Expected exception. We used to expect that there would be unsynced appends but this 172 // not reliable now that sync plays a roll in wall rolling. The above puts also now call 173 // sync. 174 } catch (Throwable t) { 175 LOG.error(HBaseMarkers.FATAL, "FAILED TEST: Got wrong exception", t); 176 } 177 } finally { 178 table.close(); 179 } 180 } 181 182 /** 183 * Tests the case where a RegionServer enters a GC pause, comes back online after the master 184 * declared it dead and started to split. Want log rolling after a master split to fail. See 185 * HBASE-2312. 186 */ 187 @Test 188 public void testLogRollAfterSplitStart() throws IOException { 189 LOG.info("Verify wal roll after split starts will fail."); 190 String logName = 191 ServerName.valueOf("testLogRollAfterSplitStart", 16010, EnvironmentEdgeManager.currentTime()) 192 .toString(); 193 Path thisTestsDir = new Path(HBASELOGDIR, AbstractFSWALProvider.getWALDirectoryName(logName)); 194 final WALFactory wals = new WALFactory(conf, logName); 195 196 try { 197 // put some entries in an WAL 198 TableName tableName = TableName.valueOf(this.getClass().getName()); 199 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).build(); 200 WAL log = wals.getWAL(regionInfo); 201 MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1); 202 203 int total = 20; 204 for (int i = 0; i < total; i++) { 205 WALEdit kvs = new WALEdit(); 206 kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName())); 207 NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR); 208 scopes.put(Bytes.toBytes("column"), 0); 209 log.appendData(regionInfo, new WALKeyImpl(regionInfo.getEncodedNameAsBytes(), tableName, 210 EnvironmentEdgeManager.currentTime(), mvcc, scopes), kvs); 211 } 212 // Send the data to HDFS datanodes and close the HDFS writer 213 log.sync(); 214 ((AbstractFSWAL<?>) log).replaceWriter(((FSHLog) log).getOldPath(), null, null); 215 216 // code taken from MasterFileSystem.getLogDirs(), which is called from 217 // MasterFileSystem.splitLog() handles RS shutdowns (as observed by the splitting process) 218 // rename the directory so a rogue RS doesn't create more WALs 219 Path rsSplitDir = thisTestsDir.suffix(AbstractFSWALProvider.SPLITTING_EXT); 220 if (!fs.rename(thisTestsDir, rsSplitDir)) { 221 throw new IOException("Failed fs.rename for log split: " + thisTestsDir); 222 } 223 LOG.debug("Renamed region directory: " + rsSplitDir); 224 225 LOG.debug("Processing the old log files."); 226 WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals); 227 228 LOG.debug("Trying to roll the WAL."); 229 try { 230 log.rollWriter(); 231 Assert.fail("rollWriter() did not throw any exception."); 232 } catch (IOException ioe) { 233 if (ioe.getCause() instanceof FileNotFoundException) { 234 LOG.info("Got the expected exception: ", ioe.getCause()); 235 } else { 236 Assert.fail("Unexpected exception: " + ioe); 237 } 238 } 239 } finally { 240 wals.close(); 241 if (fs.exists(thisTestsDir)) { 242 fs.delete(thisTestsDir, true); 243 } 244 } 245 } 246}