001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022import static org.mockito.Mockito.mock; 023import static org.mockito.Mockito.when; 024 025import java.io.IOException; 026import java.util.concurrent.atomic.AtomicLong; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.DroppedSnapshotException; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseTestingUtility; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.Server; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.Durability; 037import org.apache.hadoop.hbase.client.Put; 038import org.apache.hadoop.hbase.regionserver.wal.FSHLog; 039import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 040import org.apache.hadoop.hbase.testclassification.MediumTests; 041import org.apache.hadoop.hbase.util.Bytes; 042import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; 043import org.apache.hadoop.hbase.util.Threads; 044import org.apache.hadoop.hbase.wal.WAL; 045import org.apache.hadoop.hbase.wal.WALProvider.Writer; 046import org.junit.After; 047import org.junit.Before; 048import org.junit.ClassRule; 049import org.junit.Rule; 050import org.junit.Test; 051import org.junit.experimental.categories.Category; 052import org.junit.rules.TestName; 053import org.mockito.Mockito; 054import org.mockito.exceptions.verification.WantedButNotInvoked; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058/** 059 * Testing sync/append failures. 060 * Copied from TestHRegion. 061 */ 062@Category({MediumTests.class}) 063public class TestFailedAppendAndSync { 064 065 @ClassRule 066 public static final HBaseClassTestRule CLASS_RULE = 067 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class); 068 069 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class); 070 @Rule public TestName name = new TestName(); 071 072 private static final String COLUMN_FAMILY = "MyCF"; 073 private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); 074 075 HRegion region = null; 076 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) 077 private static HBaseTestingUtility TEST_UTIL; 078 public static Configuration CONF ; 079 private String dir; 080 081 // Test names 082 protected TableName tableName; 083 084 @Before 085 public void setup() throws IOException { 086 TEST_UTIL = HBaseTestingUtility.createLocalHTU(); 087 CONF = TEST_UTIL.getConfiguration(); 088 // Disable block cache. 089 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 090 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); 091 tableName = TableName.valueOf(name.getMethodName()); 092 } 093 094 @After 095 public void tearDown() throws Exception { 096 EnvironmentEdgeManagerTestHelper.reset(); 097 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); 098 TEST_UTIL.cleanupTestDir(); 099 } 100 101 String getName() { 102 return name.getMethodName(); 103 } 104 105 /** 106 * Reproduce locking up that happens when we get an exceptions appending and syncing. 107 * See HBASE-14317. 108 * First I need to set up some mocks for Server and RegionServerServices. I also need to 109 * set up a dodgy WAL that will throw an exception when we go to append to it. 110 */ 111 @Test 112 public void testLockupAroundBadAssignSync() throws IOException { 113 final AtomicLong rolls = new AtomicLong(0); 114 // Dodgy WAL. Will throw exceptions when flags set. 115 class DodgyFSLog extends FSHLog { 116 volatile boolean throwSyncException = false; 117 volatile boolean throwAppendException = false; 118 119 public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) 120 throws IOException { 121 super(fs, root, logDir, conf); 122 } 123 124 @Override 125 public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException { 126 byte [][] regions = super.rollWriter(force); 127 rolls.getAndIncrement(); 128 return regions; 129 } 130 131 @Override 132 protected Writer createWriterInstance(Path path) throws IOException { 133 final Writer w = super.createWriterInstance(path); 134 return new Writer() { 135 @Override 136 public void close() throws IOException { 137 w.close(); 138 } 139 140 @Override 141 public void sync() throws IOException { 142 if (throwSyncException) { 143 throw new IOException("FAKE! Failed to replace a bad datanode..."); 144 } 145 w.sync(); 146 } 147 148 @Override 149 public void append(Entry entry) throws IOException { 150 if (throwAppendException) { 151 throw new IOException("FAKE! Failed to replace a bad datanode..."); 152 } 153 w.append(entry); 154 } 155 156 @Override 157 public long getLength() { 158 return w.getLength(); 159 } 160 }; 161 } 162 } 163 164 // Make up mocked server and services. 165 Server server = mock(Server.class); 166 when(server.getConfiguration()).thenReturn(CONF); 167 when(server.isStopped()).thenReturn(false); 168 when(server.isAborted()).thenReturn(false); 169 RegionServerServices services = mock(RegionServerServices.class); 170 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with 171 // the test. 172 FileSystem fs = FileSystem.get(CONF); 173 Path rootDir = new Path(dir + getName()); 174 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF); 175 LogRoller logRoller = new LogRoller(server, services); 176 logRoller.addWAL(dodgyWAL); 177 logRoller.start(); 178 179 boolean threwOnSync = false; 180 boolean threwOnAppend = false; 181 boolean threwOnBoth = false; 182 183 HRegion region = initHRegion(tableName, null, null, dodgyWAL); 184 try { 185 // Get some random bytes. 186 byte[] value = Bytes.toBytes(getName()); 187 try { 188 // First get something into memstore 189 Put put = new Put(value); 190 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value); 191 region.put(put); 192 } catch (IOException ioe) { 193 fail(); 194 } 195 long rollsCount = rolls.get(); 196 try { 197 dodgyWAL.throwAppendException = true; 198 dodgyWAL.throwSyncException = false; 199 Put put = new Put(value); 200 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value); 201 region.put(put); 202 } catch (IOException ioe) { 203 threwOnAppend = true; 204 } 205 while (rollsCount == rolls.get()) Threads.sleep(100); 206 rollsCount = rolls.get(); 207 208 // When we get to here.. we should be ok. A new WAL has been put in place. There were no 209 // appends to sync. We should be able to continue. 210 211 try { 212 dodgyWAL.throwAppendException = true; 213 dodgyWAL.throwSyncException = true; 214 Put put = new Put(value); 215 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value); 216 region.put(put); 217 } catch (IOException ioe) { 218 threwOnBoth = true; 219 } 220 while (rollsCount == rolls.get()) Threads.sleep(100); 221 222 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able 223 // to just continue. 224 225 // So, should be no abort at this stage. Verify. 226 Mockito.verify(server, Mockito.atLeast(0)). 227 abort(Mockito.anyString(), (Throwable)Mockito.anyObject()); 228 try { 229 dodgyWAL.throwAppendException = false; 230 dodgyWAL.throwSyncException = true; 231 Put put = new Put(value); 232 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value); 233 region.put(put); 234 } catch (IOException ioe) { 235 threwOnSync = true; 236 } 237 // An append in the WAL but the sync failed is a server abort condition. That is our 238 // current semantic. Verify. It takes a while for abort to be called. Just hang here till it 239 // happens. If it don't we'll timeout the whole test. That is fine. 240 while (true) { 241 try { 242 Mockito.verify(server, Mockito.atLeast(1)). 243 abort(Mockito.anyString(), (Throwable)Mockito.anyObject()); 244 break; 245 } catch (WantedButNotInvoked t) { 246 Threads.sleep(1); 247 } 248 } 249 } finally { 250 // To stop logRoller, its server has to say it is stopped. 251 Mockito.when(server.isStopped()).thenReturn(true); 252 if (logRoller != null) logRoller.close(); 253 if (region != null) { 254 try { 255 region.close(true); 256 } catch (DroppedSnapshotException e) { 257 LOG.info("On way out; expected!", e); 258 } 259 } 260 if (dodgyWAL != null) dodgyWAL.close(); 261 assertTrue("The regionserver should have thrown an exception", threwOnBoth); 262 assertTrue("The regionserver should have thrown an exception", threwOnAppend); 263 assertTrue("The regionserver should have thrown an exception", threwOnSync); 264 } 265 } 266 267 /** 268 * @return A region on which you must call 269 * {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done. 270 */ 271 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, WAL wal) 272 throws IOException { 273 ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null); 274 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, false, Durability.SYNC_WAL, 275 wal, COLUMN_FAMILY_BYTES); 276 } 277}