001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.wal; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.NavigableMap; 023import java.util.TreeMap; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.FileSystem; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtility; 029import org.apache.hadoop.hbase.HConstants; 030import org.apache.hadoop.hbase.KeyValue; 031import org.apache.hadoop.hbase.MiniHBaseCluster; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.Admin; 035import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.RegionInfoBuilder; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 042import org.apache.hadoop.hbase.log.HBaseMarkers; 043import org.apache.hadoop.hbase.regionserver.HRegionServer; 044import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl; 045import org.apache.hadoop.hbase.testclassification.MediumTests; 046import org.apache.hadoop.hbase.testclassification.RegionServerTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.CommonFSUtils; 049import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; 050import org.apache.hadoop.hbase.wal.WAL; 051import org.apache.hadoop.hbase.wal.WALEdit; 052import org.apache.hadoop.hbase.wal.WALFactory; 053import org.apache.hadoop.hbase.wal.WALKeyImpl; 054import org.apache.hadoop.hbase.wal.WALSplitter; 055import org.apache.hadoop.hdfs.MiniDFSCluster; 056import org.junit.After; 057import org.junit.Assert; 058import org.junit.Before; 059import org.junit.BeforeClass; 060import org.junit.ClassRule; 061import org.junit.Test; 062import org.junit.experimental.categories.Category; 063import org.slf4j.Logger; 064import org.slf4j.LoggerFactory; 065 066/** 067 * Tests for conditions that should trigger RegionServer aborts when 068 * rolling the current WAL fails. 069 */ 070@Category({RegionServerTests.class, MediumTests.class}) 071public class TestLogRollAbort { 072 073 @ClassRule 074 public static final HBaseClassTestRule CLASS_RULE = 075 HBaseClassTestRule.forClass(TestLogRollAbort.class); 076 077 private static final Logger LOG = LoggerFactory.getLogger(AbstractTestLogRolling.class); 078 private static MiniDFSCluster dfsCluster; 079 private static Admin admin; 080 private static MiniHBaseCluster cluster; 081 protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 082 083 /* For the split-then-roll test */ 084 private static final Path HBASEDIR = new Path("/hbase"); 085 private static final Path HBASELOGDIR = new Path("/hbaselog"); 086 private static final Path OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME); 087 088 // Need to override this setup so we can edit the config before it gets sent 089 // to the HDFS & HBase cluster startup. 090 @BeforeClass 091 public static void setUpBeforeClass() throws Exception { 092 // Tweak default timeout values down for faster recovery 093 TEST_UTIL.getConfiguration().setInt( 094 "hbase.regionserver.logroll.errors.tolerated", 2); 095 TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000); 096 097 // Increase the amount of time between client retries 098 TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000); 099 100 // lower the namenode & datanode heartbeat so the namenode 101 // quickly detects datanode failures 102 TEST_UTIL.getConfiguration().setInt("dfs.namenode.heartbeat.recheck-interval", 5000); 103 TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1); 104 // the namenode might still try to choose the recently-dead datanode 105 // for a pipeline, so try to a new pipeline multiple times 106 TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10); 107 TEST_UTIL.getConfiguration().set(WALFactory.WAL_PROVIDER, "filesystem"); 108 } 109 110 private Configuration conf; 111 private FileSystem fs; 112 113 @Before 114 public void setUp() throws Exception { 115 TEST_UTIL.startMiniCluster(2); 116 117 cluster = TEST_UTIL.getHBaseCluster(); 118 dfsCluster = TEST_UTIL.getDFSCluster(); 119 admin = TEST_UTIL.getAdmin(); 120 conf = TEST_UTIL.getConfiguration(); 121 fs = TEST_UTIL.getDFSCluster().getFileSystem(); 122 123 // disable region rebalancing (interferes with log watching) 124 cluster.getMaster().balanceSwitch(false); 125 CommonFSUtils.setRootDir(conf, HBASEDIR); 126 CommonFSUtils.setWALRootDir(conf, HBASELOGDIR); 127 } 128 129 @After 130 public void tearDown() throws Exception { 131 TEST_UTIL.shutdownMiniCluster(); 132 } 133 134 /** 135 * Tests that RegionServer aborts if we hit an error closing the WAL when 136 * there are unsynced WAL edits. See HBASE-4282. 137 */ 138 @Test 139 public void testRSAbortWithUnflushedEdits() throws Exception { 140 LOG.info("Starting testRSAbortWithUnflushedEdits()"); 141 142 // When the hbase:meta table can be opened, the region servers are running 143 TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME).close(); 144 145 // Create the test table and open it 146 TableName tableName = TableName.valueOf(this.getClass().getSimpleName()); 147 TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 148 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 149 150 admin.createTable(desc); 151 Table table = TEST_UTIL.getConnection().getTable(tableName); 152 try { 153 HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(tableName); 154 WAL log = server.getWAL(null); 155 156 Put p = new Put(Bytes.toBytes("row2001")); 157 p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001)); 158 table.put(p); 159 160 log.sync(); 161 162 p = new Put(Bytes.toBytes("row2002")); 163 p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002)); 164 table.put(p); 165 166 dfsCluster.restartDataNodes(); 167 LOG.info("Restarted datanodes"); 168 169 try { 170 log.rollWriter(true); 171 } catch (FailedLogCloseException flce) { 172 // Expected exception. We used to expect that there would be unsynced appends but this 173 // not reliable now that sync plays a roll in wall rolling. The above puts also now call 174 // sync. 175 } catch (Throwable t) { 176 LOG.error(HBaseMarkers.FATAL, "FAILED TEST: Got wrong exception", t); 177 } 178 } finally { 179 table.close(); 180 } 181 } 182 183 /** 184 * Tests the case where a RegionServer enters a GC pause, 185 * comes back online after the master declared it dead and started to split. 186 * Want log rolling after a master split to fail. See HBASE-2312. 187 */ 188 @Test 189 public void testLogRollAfterSplitStart() throws IOException { 190 LOG.info("Verify wal roll after split starts will fail."); 191 String logName = ServerName.valueOf("testLogRollAfterSplitStart", 192 16010, System.currentTimeMillis()).toString(); 193 Path thisTestsDir = new Path(HBASELOGDIR, AbstractFSWALProvider.getWALDirectoryName(logName)); 194 final WALFactory wals = new WALFactory(conf, logName); 195 196 try { 197 // put some entries in an WAL 198 TableName tableName = 199 TableName.valueOf(this.getClass().getName()); 200 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).build(); 201 WAL log = wals.getWAL(regionInfo); 202 MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1); 203 204 int total = 20; 205 for (int i = 0; i < total; i++) { 206 WALEdit kvs = new WALEdit(); 207 kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName())); 208 NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR); 209 scopes.put(Bytes.toBytes("column"), 0); 210 log.appendData(regionInfo, new WALKeyImpl(regionInfo.getEncodedNameAsBytes(), tableName, 211 System.currentTimeMillis(), mvcc, scopes), kvs); 212 } 213 // Send the data to HDFS datanodes and close the HDFS writer 214 log.sync(); 215 ((AbstractFSWAL<?>) log).replaceWriter(((FSHLog)log).getOldPath(), null, null); 216 217 // code taken from MasterFileSystem.getLogDirs(), which is called from 218 // MasterFileSystem.splitLog() handles RS shutdowns (as observed by the splitting process) 219 // rename the directory so a rogue RS doesn't create more WALs 220 Path rsSplitDir = thisTestsDir.suffix(AbstractFSWALProvider.SPLITTING_EXT); 221 if (!fs.rename(thisTestsDir, rsSplitDir)) { 222 throw new IOException("Failed fs.rename for log split: " + thisTestsDir); 223 } 224 LOG.debug("Renamed region directory: " + rsSplitDir); 225 226 LOG.debug("Processing the old log files."); 227 WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals); 228 229 LOG.debug("Trying to roll the WAL."); 230 try { 231 log.rollWriter(); 232 Assert.fail("rollWriter() did not throw any exception."); 233 } catch (IOException ioe) { 234 if (ioe.getCause() instanceof FileNotFoundException) { 235 LOG.info("Got the expected exception: ", ioe.getCause()); 236 } else { 237 Assert.fail("Unexpected exception: " + ioe); 238 } 239 } 240 } finally { 241 wals.close(); 242 if (fs.exists(thisTestsDir)) { 243 fs.delete(thisTestsDir, true); 244 } 245 } 246 } 247}