001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022import static org.mockito.ArgumentMatchers.any; 023import static org.mockito.ArgumentMatchers.anyString; 024import static org.mockito.Mockito.atLeast; 025import static org.mockito.Mockito.mock; 026import static org.mockito.Mockito.verify; 027import static org.mockito.Mockito.when; 028 029import java.io.IOException; 030import java.util.List; 031import java.util.Map; 032import java.util.concurrent.atomic.AtomicLong; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.fs.FileSystem; 035import org.apache.hadoop.fs.Path; 036import org.apache.hadoop.hbase.DroppedSnapshotException; 037import org.apache.hadoop.hbase.HBaseClassTestRule; 038import org.apache.hadoop.hbase.HBaseTestingUtil; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.Server; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.client.Durability; 043import org.apache.hadoop.hbase.client.Put; 044import org.apache.hadoop.hbase.regionserver.wal.FSHLog; 045import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 046import org.apache.hadoop.hbase.testclassification.SmallTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; 049import org.apache.hadoop.hbase.util.Pair; 050import org.apache.hadoop.hbase.util.Threads; 051import org.apache.hadoop.hbase.wal.WAL; 052import org.apache.hadoop.hbase.wal.WALProvider.Writer; 053import org.junit.After; 054import org.junit.Before; 055import org.junit.ClassRule; 056import org.junit.Rule; 057import org.junit.Test; 058import org.junit.experimental.categories.Category; 059import org.junit.rules.TestName; 060import org.mockito.exceptions.verification.WantedButNotInvoked; 061import org.slf4j.Logger; 062import org.slf4j.LoggerFactory; 063 064/** 065 * Testing sync/append failures. Copied from TestHRegion. 066 */ 067@Category({ SmallTests.class }) 068public class TestFailedAppendAndSync { 069 070 @ClassRule 071 public static final HBaseClassTestRule CLASS_RULE = 072 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class); 073 074 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class); 075 @Rule 076 public TestName name = new TestName(); 077 078 private static final String COLUMN_FAMILY = "MyCF"; 079 private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); 080 081 HRegion region = null; 082 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) 083 private static HBaseTestingUtil TEST_UTIL; 084 public static Configuration CONF; 085 private String dir; 086 087 // Test names 088 protected TableName tableName; 089 090 @Before 091 public void setup() throws IOException { 092 TEST_UTIL = new HBaseTestingUtil(); 093 CONF = TEST_UTIL.getConfiguration(); 094 // Disable block cache. 095 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 096 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); 097 tableName = TableName.valueOf(name.getMethodName()); 098 } 099 100 @After 101 public void tearDown() throws Exception { 102 EnvironmentEdgeManagerTestHelper.reset(); 103 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); 104 TEST_UTIL.cleanupTestDir(); 105 } 106 107 String getName() { 108 return name.getMethodName(); 109 } 110 111 // Dodgy WAL. Will throw exceptions when flags set. 112 class DodgyFSLog extends FSHLog { 113 volatile boolean throwSyncException = false; 114 volatile boolean throwAppendException = false; 115 volatile boolean throwArchiveException = false; 116 117 final AtomicLong rolls = new AtomicLong(0); 118 119 public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf) 120 throws IOException { 121 super(fs, server, root, logDir, conf); 122 } 123 124 @Override 125 public Map<byte[], List<byte[]>> rollWriter(boolean force) 126 throws FailedLogCloseException, IOException { 127 Map<byte[], List<byte[]>> regions = super.rollWriter(force); 128 rolls.getAndIncrement(); 129 return regions; 130 } 131 132 @Override 133 protected void archiveLogFile(Path p) throws IOException { 134 if (throwArchiveException) { 135 throw new IOException("throw archival exception"); 136 } 137 } 138 139 @Override 140 protected void archive(Pair<Path, Long> localLogsToArchive) { 141 super.archive(localLogsToArchive); 142 } 143 144 @Override 145 protected Writer createWriterInstance(Path path) throws IOException { 146 final Writer w = super.createWriterInstance(path); 147 return new Writer() { 148 @Override 149 public void close() throws IOException { 150 w.close(); 151 } 152 153 @Override 154 public void sync(boolean forceSync) throws IOException { 155 if (throwSyncException) { 156 throw new IOException("FAKE! Failed to replace a bad datanode..."); 157 } 158 w.sync(forceSync); 159 } 160 161 @Override 162 public void append(Entry entry) throws IOException { 163 if (throwAppendException) { 164 throw new IOException("FAKE! Failed to replace a bad datanode..."); 165 } 166 w.append(entry); 167 } 168 169 @Override 170 public long getLength() { 171 return w.getLength(); 172 } 173 174 @Override 175 public long getSyncedLength() { 176 return w.getSyncedLength(); 177 } 178 }; 179 } 180 } 181 182 /** 183 * Reproduce locking up that happens when we get an exceptions appending and syncing. See 184 * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need 185 * to set up a dodgy WAL that will throw an exception when we go to append to it. 186 */ 187 @Test 188 public void testLockupAroundBadAssignSync() throws IOException { 189 // Make up mocked server and services. 190 RegionServerServices services = mock(RegionServerServices.class); 191 when(services.getConfiguration()).thenReturn(CONF); 192 when(services.isStopped()).thenReturn(false); 193 when(services.isAborted()).thenReturn(false); 194 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with 195 // the test. 196 FileSystem fs = FileSystem.get(CONF); 197 Path rootDir = new Path(dir + getName()); 198 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server) services, rootDir, getName(), CONF); 199 dodgyWAL.init(); 200 LogRoller logRoller = new LogRoller(services); 201 logRoller.addWAL(dodgyWAL); 202 logRoller.start(); 203 204 boolean threwOnSync = false; 205 boolean threwOnAppend = false; 206 boolean threwOnBoth = false; 207 208 HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL); 209 try { 210 // Get some random bytes. 211 byte[] value = Bytes.toBytes(getName()); 212 try { 213 // First get something into memstore 214 Put put = new Put(value); 215 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value); 216 region.put(put); 217 } catch (IOException ioe) { 218 fail(); 219 } 220 long rollsCount = dodgyWAL.rolls.get(); 221 try { 222 dodgyWAL.throwAppendException = true; 223 dodgyWAL.throwSyncException = false; 224 Put put = new Put(value); 225 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value); 226 region.put(put); 227 } catch (IOException ioe) { 228 threwOnAppend = true; 229 } 230 while (rollsCount == dodgyWAL.rolls.get()) { 231 Threads.sleep(100); 232 } 233 rollsCount = dodgyWAL.rolls.get(); 234 235 // When we get to here.. we should be ok. A new WAL has been put in place. There were no 236 // appends to sync. We should be able to continue. 237 238 try { 239 dodgyWAL.throwAppendException = true; 240 dodgyWAL.throwSyncException = true; 241 Put put = new Put(value); 242 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value); 243 region.put(put); 244 } catch (IOException ioe) { 245 threwOnBoth = true; 246 } 247 while (rollsCount == dodgyWAL.rolls.get()) { 248 Threads.sleep(100); 249 } 250 251 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able 252 // to just continue. 253 254 // So, should be no abort at this stage. Verify. 255 verify(services, atLeast(0)).abort(anyString(), any(Throwable.class)); 256 try { 257 dodgyWAL.throwAppendException = false; 258 dodgyWAL.throwSyncException = true; 259 Put put = new Put(value); 260 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value); 261 region.put(put); 262 } catch (IOException ioe) { 263 threwOnSync = true; 264 } 265 // An append in the WAL but the sync failed is a server abort condition. That is our 266 // current semantic. Verify. It takes a while for abort to be called. Just hang here till it 267 // happens. If it don't we'll timeout the whole test. That is fine. 268 while (true) { 269 try { 270 verify(services, atLeast(1)).abort(anyString(), any(Throwable.class)); 271 break; 272 } catch (WantedButNotInvoked t) { 273 Threads.sleep(1); 274 } 275 } 276 277 try { 278 dodgyWAL.throwAppendException = false; 279 dodgyWAL.throwSyncException = false; 280 dodgyWAL.throwArchiveException = true; 281 Pair<Path, Long> pair = new Pair<Path, Long>(); 282 pair.setFirst(new Path("/a/b/")); 283 pair.setSecond(100L); 284 dodgyWAL.archive(pair); 285 } catch (Throwable ioe) { 286 } 287 while (true) { 288 try { 289 // one more abort needs to be called 290 verify(services, atLeast(2)).abort(anyString(), any()); 291 break; 292 } catch (WantedButNotInvoked t) { 293 Threads.sleep(1); 294 } 295 } 296 } finally { 297 // To stop logRoller, its server has to say it is stopped. 298 when(services.isStopped()).thenReturn(true); 299 if (logRoller != null) logRoller.close(); 300 if (region != null) { 301 try { 302 region.close(true); 303 } catch (DroppedSnapshotException e) { 304 LOG.info("On way out; expected!", e); 305 } 306 } 307 if (dodgyWAL != null) dodgyWAL.close(); 308 assertTrue("The regionserver should have thrown an exception", threwOnBoth); 309 assertTrue("The regionserver should have thrown an exception", threwOnAppend); 310 assertTrue("The regionserver should have thrown an exception", threwOnSync); 311 } 312 } 313 314 /** 315 * @return A region on which you must call {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)} 316 * when done. 317 */ 318 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, 319 Configuration conf, WAL wal) throws IOException { 320 ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null, 321 MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT); 322 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false, 323 Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); 324 } 325}