001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022import static org.mockito.Mockito.mock; 023import static org.mockito.Mockito.when; 024 025import java.io.IOException; 026import java.util.List; 027import java.util.Map; 028import java.util.concurrent.atomic.AtomicLong; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.Abortable; 033import org.apache.hadoop.hbase.DroppedSnapshotException; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.Durability; 039import org.apache.hadoop.hbase.client.Put; 040import org.apache.hadoop.hbase.regionserver.wal.FSHLog; 041import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 042import org.apache.hadoop.hbase.testclassification.SmallTests; 043import org.apache.hadoop.hbase.util.Bytes; 044import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; 045import org.apache.hadoop.hbase.util.Pair; 046import org.apache.hadoop.hbase.util.Threads; 047import org.apache.hadoop.hbase.wal.WAL; 048import org.apache.hadoop.hbase.wal.WALProvider.Writer; 049import org.junit.After; 050import org.junit.Before; 051import org.junit.ClassRule; 052import org.junit.Rule; 053import org.junit.Test; 054import org.junit.experimental.categories.Category; 055import org.junit.rules.TestName; 056import org.mockito.Mockito; 057import org.mockito.exceptions.verification.WantedButNotInvoked; 058import org.slf4j.Logger; 059import org.slf4j.LoggerFactory; 060 061/** 062 * Testing sync/append failures. 063 * Copied from TestHRegion. 064 */ 065@Category({SmallTests.class}) 066public class TestFailedAppendAndSync { 067 068 @ClassRule 069 public static final HBaseClassTestRule CLASS_RULE = 070 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class); 071 072 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class); 073 @Rule public TestName name = new TestName(); 074 075 private static final String COLUMN_FAMILY = "MyCF"; 076 private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); 077 078 HRegion region = null; 079 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) 080 private static HBaseTestingUtility TEST_UTIL; 081 public static Configuration CONF ; 082 private String dir; 083 084 // Test names 085 protected TableName tableName; 086 087 @Before 088 public void setup() throws IOException { 089 TEST_UTIL = HBaseTestingUtility.createLocalHTU(); 090 CONF = TEST_UTIL.getConfiguration(); 091 // Disable block cache. 092 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 093 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); 094 tableName = TableName.valueOf(name.getMethodName()); 095 } 096 097 @After 098 public void tearDown() throws Exception { 099 EnvironmentEdgeManagerTestHelper.reset(); 100 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); 101 TEST_UTIL.cleanupTestDir(); 102 } 103 104 String getName() { 105 return name.getMethodName(); 106 } 107 108 /** 109 * Reproduce locking up that happens when we get an exceptions appending and syncing. 110 * See HBASE-14317. 111 * First I need to set up some mocks for Server and RegionServerServices. I also need to 112 * set up a dodgy WAL that will throw an exception when we go to append to it. 113 */ 114 @Test 115 public void testLockupAroundBadAssignSync() throws IOException { 116 final AtomicLong rolls = new AtomicLong(0); 117 // Dodgy WAL. Will throw exceptions when flags set. 118 class DodgyFSLog extends FSHLog { 119 volatile boolean throwSyncException = false; 120 volatile boolean throwAppendException = false; 121 volatile boolean throwArchiveException = false; 122 123 public DodgyFSLog(FileSystem fs, Abortable abortable, Path root, String logDir, 124 Configuration conf) throws IOException { 125 super(fs, abortable, root, logDir, conf); 126 } 127 128 @Override 129 public Map<byte[], List<byte[]>> rollWriter(boolean force) 130 throws FailedLogCloseException, IOException { 131 Map<byte[], List<byte[]>> regions = super.rollWriter(force); 132 rolls.getAndIncrement(); 133 return regions; 134 } 135 136 @Override 137 protected void archiveLogFile(Path p) throws IOException { 138 if (throwArchiveException) { 139 throw new IOException("throw archival exception"); 140 } 141 } 142 143 @Override 144 protected void archive(Pair<Path, Long> localLogsToArchive) { 145 super.archive(localLogsToArchive); 146 } 147 148 @Override 149 protected Writer createWriterInstance(Path path) throws IOException { 150 final Writer w = super.createWriterInstance(path); 151 return new Writer() { 152 @Override 153 public void close() throws IOException { 154 w.close(); 155 } 156 157 @Override 158 public void sync(boolean forceSync) throws IOException { 159 if (throwSyncException) { 160 throw new IOException("FAKE! Failed to replace a bad datanode..."); 161 } 162 w.sync(forceSync); 163 } 164 165 @Override 166 public void append(Entry entry) throws IOException { 167 if (throwAppendException) { 168 throw new IOException("FAKE! Failed to replace a bad datanode..."); 169 } 170 w.append(entry); 171 } 172 173 @Override 174 public long getLength() { 175 return w.getLength(); 176 } 177 178 @Override 179 public long getSyncedLength() { 180 return w.getSyncedLength(); 181 } 182 }; 183 } 184 } 185 186 // Make up mocked server and services. 187 RegionServerServices services = mock(RegionServerServices.class); 188 when(services.getConfiguration()).thenReturn(CONF); 189 when(services.isStopped()).thenReturn(false); 190 when(services.isAborted()).thenReturn(false); 191 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with 192 // the test. 193 FileSystem fs = FileSystem.get(CONF); 194 Path rootDir = new Path(dir + getName()); 195 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, services, rootDir, getName(), CONF); 196 dodgyWAL.init(); 197 LogRoller logRoller = new LogRoller(services); 198 logRoller.addWAL(dodgyWAL); 199 logRoller.start(); 200 201 boolean threwOnSync = false; 202 boolean threwOnAppend = false; 203 boolean threwOnBoth = false; 204 205 HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL); 206 try { 207 // Get some random bytes. 208 byte[] value = Bytes.toBytes(getName()); 209 try { 210 // First get something into memstore 211 Put put = new Put(value); 212 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value); 213 region.put(put); 214 } catch (IOException ioe) { 215 fail(); 216 } 217 long rollsCount = rolls.get(); 218 try { 219 dodgyWAL.throwAppendException = true; 220 dodgyWAL.throwSyncException = false; 221 Put put = new Put(value); 222 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value); 223 region.put(put); 224 } catch (IOException ioe) { 225 threwOnAppend = true; 226 } 227 while (rollsCount == rolls.get()) Threads.sleep(100); 228 rollsCount = rolls.get(); 229 230 // When we get to here.. we should be ok. A new WAL has been put in place. There were no 231 // appends to sync. We should be able to continue. 232 233 try { 234 dodgyWAL.throwAppendException = true; 235 dodgyWAL.throwSyncException = true; 236 Put put = new Put(value); 237 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value); 238 region.put(put); 239 } catch (IOException ioe) { 240 threwOnBoth = true; 241 } 242 while (rollsCount == rolls.get()) Threads.sleep(100); 243 244 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able 245 // to just continue. 246 247 // So, should be no abort at this stage. Verify. 248 Mockito.verify(services, Mockito.atLeast(0)). 249 abort(Mockito.anyString(), (Throwable)Mockito.anyObject()); 250 try { 251 dodgyWAL.throwAppendException = false; 252 dodgyWAL.throwSyncException = true; 253 Put put = new Put(value); 254 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value); 255 region.put(put); 256 } catch (IOException ioe) { 257 threwOnSync = true; 258 } 259 // An append in the WAL but the sync failed is a server abort condition. That is our 260 // current semantic. Verify. It takes a while for abort to be called. Just hang here till it 261 // happens. If it don't we'll timeout the whole test. That is fine. 262 while (true) { 263 try { 264 Mockito.verify(services, Mockito.atLeast(1)). 265 abort(Mockito.anyString(), (Throwable)Mockito.anyObject()); 266 break; 267 } catch (WantedButNotInvoked t) { 268 Threads.sleep(1); 269 } 270 } 271 272 try { 273 dodgyWAL.throwAppendException = false; 274 dodgyWAL.throwSyncException = false; 275 dodgyWAL.throwArchiveException = true; 276 Pair<Path, Long> pair = new Pair<Path, Long>(); 277 pair.setFirst(new Path("/a/b/")); 278 pair.setSecond(100L); 279 dodgyWAL.archive(pair); 280 } catch (Throwable ioe) { 281 } 282 while (true) { 283 try { 284 // one more abort needs to be called 285 Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(), 286 (Throwable) Mockito.anyObject()); 287 break; 288 } catch (WantedButNotInvoked t) { 289 Threads.sleep(1); 290 } 291 } 292 } finally { 293 // To stop logRoller, its server has to say it is stopped. 294 Mockito.when(services.isStopped()).thenReturn(true); 295 if (logRoller != null) logRoller.close(); 296 if (region != null) { 297 try { 298 region.close(true); 299 } catch (DroppedSnapshotException e) { 300 LOG.info("On way out; expected!", e); 301 } 302 } 303 if (dodgyWAL != null) dodgyWAL.close(); 304 assertTrue("The regionserver should have thrown an exception", threwOnBoth); 305 assertTrue("The regionserver should have thrown an exception", threwOnAppend); 306 assertTrue("The regionserver should have thrown an exception", threwOnSync); 307 } 308 } 309 310 /** 311 * @return A region on which you must call 312 * {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done. 313 */ 314 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, 315 Configuration conf, WAL wal) throws IOException { 316 ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 317 0, null, MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT); 318 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false, 319 Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); 320 } 321}