001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022import static org.mockito.Mockito.mock; 023import static org.mockito.Mockito.when; 024 025import java.io.IOException; 026import java.util.concurrent.atomic.AtomicLong; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.DroppedSnapshotException; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseTestingUtility; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.Durability; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.regionserver.wal.FSHLog; 038import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 039import org.apache.hadoop.hbase.testclassification.SmallTests; 040import org.apache.hadoop.hbase.util.Bytes; 041import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; 042import org.apache.hadoop.hbase.util.Threads; 043import org.apache.hadoop.hbase.wal.WAL; 044import org.apache.hadoop.hbase.wal.WALProvider.Writer; 045import org.junit.After; 046import org.junit.Before; 047import org.junit.ClassRule; 048import org.junit.Rule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051import org.junit.rules.TestName; 052import org.mockito.Mockito; 053import org.mockito.exceptions.verification.WantedButNotInvoked; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * Testing sync/append failures. 059 * Copied from TestHRegion. 060 */ 061@Category({SmallTests.class}) 062public class TestFailedAppendAndSync { 063 064 @ClassRule 065 public static final HBaseClassTestRule CLASS_RULE = 066 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class); 067 068 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class); 069 @Rule public TestName name = new TestName(); 070 071 private static final String COLUMN_FAMILY = "MyCF"; 072 private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); 073 074 HRegion region = null; 075 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) 076 private static HBaseTestingUtility TEST_UTIL; 077 public static Configuration CONF ; 078 private String dir; 079 080 // Test names 081 protected TableName tableName; 082 083 @Before 084 public void setup() throws IOException { 085 TEST_UTIL = HBaseTestingUtility.createLocalHTU(); 086 CONF = TEST_UTIL.getConfiguration(); 087 // Disable block cache. 088 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 089 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); 090 tableName = TableName.valueOf(name.getMethodName()); 091 } 092 093 @After 094 public void tearDown() throws Exception { 095 EnvironmentEdgeManagerTestHelper.reset(); 096 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); 097 TEST_UTIL.cleanupTestDir(); 098 } 099 100 String getName() { 101 return name.getMethodName(); 102 } 103 104 /** 105 * Reproduce locking up that happens when we get an exceptions appending and syncing. 106 * See HBASE-14317. 107 * First I need to set up some mocks for Server and RegionServerServices. I also need to 108 * set up a dodgy WAL that will throw an exception when we go to append to it. 109 */ 110 @Test 111 public void testLockupAroundBadAssignSync() throws IOException { 112 final AtomicLong rolls = new AtomicLong(0); 113 // Dodgy WAL. Will throw exceptions when flags set. 114 class DodgyFSLog extends FSHLog { 115 volatile boolean throwSyncException = false; 116 volatile boolean throwAppendException = false; 117 118 public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) 119 throws IOException { 120 super(fs, root, logDir, conf); 121 } 122 123 @Override 124 public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException { 125 byte [][] regions = super.rollWriter(force); 126 rolls.getAndIncrement(); 127 return regions; 128 } 129 130 @Override 131 protected Writer createWriterInstance(Path path) throws IOException { 132 final Writer w = super.createWriterInstance(path); 133 return new Writer() { 134 @Override 135 public void close() throws IOException { 136 w.close(); 137 } 138 139 @Override 140 public void sync(boolean forceSync) throws IOException { 141 if (throwSyncException) { 142 throw new IOException("FAKE! Failed to replace a bad datanode..."); 143 } 144 w.sync(forceSync); 145 } 146 147 @Override 148 public void append(Entry entry) throws IOException { 149 if (throwAppendException) { 150 throw new IOException("FAKE! Failed to replace a bad datanode..."); 151 } 152 w.append(entry); 153 } 154 155 @Override 156 public long getLength() { 157 return w.getLength(); 158 } 159 }; 160 } 161 } 162 163 // Make up mocked server and services. 164 RegionServerServices services = mock(RegionServerServices.class); 165 when(services.getConfiguration()).thenReturn(CONF); 166 when(services.isStopped()).thenReturn(false); 167 when(services.isAborted()).thenReturn(false); 168 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with 169 // the test. 170 FileSystem fs = FileSystem.get(CONF); 171 Path rootDir = new Path(dir + getName()); 172 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF); 173 dodgyWAL.init(); 174 LogRoller logRoller = new LogRoller(services); 175 logRoller.addWAL(dodgyWAL); 176 logRoller.start(); 177 178 boolean threwOnSync = false; 179 boolean threwOnAppend = false; 180 boolean threwOnBoth = false; 181 182 HRegion region = initHRegion(tableName, null, null, dodgyWAL); 183 try { 184 // Get some random bytes. 185 byte[] value = Bytes.toBytes(getName()); 186 try { 187 // First get something into memstore 188 Put put = new Put(value); 189 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value); 190 region.put(put); 191 } catch (IOException ioe) { 192 fail(); 193 } 194 long rollsCount = rolls.get(); 195 try { 196 dodgyWAL.throwAppendException = true; 197 dodgyWAL.throwSyncException = false; 198 Put put = new Put(value); 199 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value); 200 region.put(put); 201 } catch (IOException ioe) { 202 threwOnAppend = true; 203 } 204 while (rollsCount == rolls.get()) Threads.sleep(100); 205 rollsCount = rolls.get(); 206 207 // When we get to here.. we should be ok. A new WAL has been put in place. There were no 208 // appends to sync. We should be able to continue. 209 210 try { 211 dodgyWAL.throwAppendException = true; 212 dodgyWAL.throwSyncException = true; 213 Put put = new Put(value); 214 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value); 215 region.put(put); 216 } catch (IOException ioe) { 217 threwOnBoth = true; 218 } 219 while (rollsCount == rolls.get()) Threads.sleep(100); 220 221 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able 222 // to just continue. 223 224 // So, should be no abort at this stage. Verify. 225 Mockito.verify(services, Mockito.atLeast(0)). 226 abort(Mockito.anyString(), (Throwable)Mockito.anyObject()); 227 try { 228 dodgyWAL.throwAppendException = false; 229 dodgyWAL.throwSyncException = true; 230 Put put = new Put(value); 231 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value); 232 region.put(put); 233 } catch (IOException ioe) { 234 threwOnSync = true; 235 } 236 // An append in the WAL but the sync failed is a server abort condition. That is our 237 // current semantic. Verify. It takes a while for abort to be called. Just hang here till it 238 // happens. If it don't we'll timeout the whole test. That is fine. 239 while (true) { 240 try { 241 Mockito.verify(services, Mockito.atLeast(1)). 242 abort(Mockito.anyString(), (Throwable)Mockito.anyObject()); 243 break; 244 } catch (WantedButNotInvoked t) { 245 Threads.sleep(1); 246 } 247 } 248 } finally { 249 // To stop logRoller, its server has to say it is stopped. 250 Mockito.when(services.isStopped()).thenReturn(true); 251 if (logRoller != null) logRoller.close(); 252 if (region != null) { 253 try { 254 region.close(true); 255 } catch (DroppedSnapshotException e) { 256 LOG.info("On way out; expected!", e); 257 } 258 } 259 if (dodgyWAL != null) dodgyWAL.close(); 260 assertTrue("The regionserver should have thrown an exception", threwOnBoth); 261 assertTrue("The regionserver should have thrown an exception", threwOnAppend); 262 assertTrue("The regionserver should have thrown an exception", threwOnSync); 263 } 264 } 265 266 /** 267 * @return A region on which you must call 268 * {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done. 269 */ 270 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, WAL wal) 271 throws IOException { 272 ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null); 273 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, false, Durability.SYNC_WAL, 274 wal, COLUMN_FAMILY_BYTES); 275 } 276}