001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022import static org.mockito.Mockito.mock; 023import static org.mockito.Mockito.when; 024 025import java.io.IOException; 026import java.util.List; 027import java.util.Map; 028import java.util.concurrent.atomic.AtomicLong; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.Abortable; 033import org.apache.hadoop.hbase.DroppedSnapshotException; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtility; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.Durability; 039import org.apache.hadoop.hbase.client.Put; 040import org.apache.hadoop.hbase.regionserver.wal.FSHLog; 041import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 042import org.apache.hadoop.hbase.testclassification.SmallTests; 043import org.apache.hadoop.hbase.util.Bytes; 044import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; 045import org.apache.hadoop.hbase.util.Pair; 046import org.apache.hadoop.hbase.util.Threads; 047import org.apache.hadoop.hbase.wal.WAL; 048import org.apache.hadoop.hbase.wal.WALProvider.Writer; 049import org.junit.After; 050import org.junit.Before; 051import org.junit.ClassRule; 052import org.junit.Rule; 053import org.junit.Test; 054import org.junit.experimental.categories.Category; 055import org.junit.rules.TestName; 056import org.mockito.Mockito; 057import org.mockito.exceptions.verification.WantedButNotInvoked; 058import org.slf4j.Logger; 059import org.slf4j.LoggerFactory; 060 061/** 062 * Testing sync/append failures. Copied from TestHRegion. 063 */ 064@Category({ SmallTests.class }) 065public class TestFailedAppendAndSync { 066 067 @ClassRule 068 public static final HBaseClassTestRule CLASS_RULE = 069 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class); 070 071 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class); 072 @Rule 073 public TestName name = new TestName(); 074 075 private static final String COLUMN_FAMILY = "MyCF"; 076 private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); 077 078 HRegion region = null; 079 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) 080 private static HBaseTestingUtility TEST_UTIL; 081 public static Configuration CONF; 082 private String dir; 083 084 // Test names 085 protected TableName tableName; 086 087 @Before 088 public void setup() throws IOException { 089 TEST_UTIL = HBaseTestingUtility.createLocalHTU(); 090 CONF = TEST_UTIL.getConfiguration(); 091 // Disable block cache. 092 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 093 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); 094 tableName = TableName.valueOf(name.getMethodName()); 095 } 096 097 @After 098 public void tearDown() throws Exception { 099 EnvironmentEdgeManagerTestHelper.reset(); 100 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); 101 TEST_UTIL.cleanupTestDir(); 102 } 103 104 String getName() { 105 return name.getMethodName(); 106 } 107 108 /** 109 * Reproduce locking up that happens when we get an exceptions appending and syncing. See 110 * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need 111 * to set up a dodgy WAL that will throw an exception when we go to append to it. 112 */ 113 @Test 114 public void testLockupAroundBadAssignSync() throws IOException { 115 final AtomicLong rolls = new AtomicLong(0); 116 // Dodgy WAL. Will throw exceptions when flags set. 117 class DodgyFSLog extends FSHLog { 118 volatile boolean throwSyncException = false; 119 volatile boolean throwAppendException = false; 120 volatile boolean throwArchiveException = false; 121 122 public DodgyFSLog(FileSystem fs, Abortable abortable, Path root, String logDir, 123 Configuration conf) throws IOException { 124 super(fs, abortable, root, logDir, conf); 125 } 126 127 @Override 128 public Map<byte[], List<byte[]>> rollWriter(boolean force) 129 throws FailedLogCloseException, IOException { 130 Map<byte[], List<byte[]>> regions = super.rollWriter(force); 131 rolls.getAndIncrement(); 132 return regions; 133 } 134 135 @Override 136 protected void archiveLogFile(Path p) throws IOException { 137 if (throwArchiveException) { 138 throw new IOException("throw archival exception"); 139 } 140 } 141 142 @Override 143 protected void archive(Pair<Path, Long> localLogsToArchive) { 144 super.archive(localLogsToArchive); 145 } 146 147 @Override 148 protected Writer createWriterInstance(Path path) throws IOException { 149 final Writer w = super.createWriterInstance(path); 150 return new Writer() { 151 @Override 152 public void close() throws IOException { 153 w.close(); 154 } 155 156 @Override 157 public void sync(boolean forceSync) throws IOException { 158 if (throwSyncException) { 159 throw new IOException("FAKE! Failed to replace a bad datanode..."); 160 } 161 w.sync(forceSync); 162 } 163 164 @Override 165 public void append(Entry entry) throws IOException { 166 if (throwAppendException) { 167 throw new IOException("FAKE! Failed to replace a bad datanode..."); 168 } 169 w.append(entry); 170 } 171 172 @Override 173 public long getLength() { 174 return w.getLength(); 175 } 176 177 @Override 178 public long getSyncedLength() { 179 return w.getSyncedLength(); 180 } 181 }; 182 } 183 } 184 185 // Make up mocked server and services. 186 RegionServerServices services = mock(RegionServerServices.class); 187 when(services.getConfiguration()).thenReturn(CONF); 188 when(services.isStopped()).thenReturn(false); 189 when(services.isAborted()).thenReturn(false); 190 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with 191 // the test. 192 FileSystem fs = FileSystem.get(CONF); 193 Path rootDir = new Path(dir + getName()); 194 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, services, rootDir, getName(), CONF); 195 dodgyWAL.init(); 196 LogRoller logRoller = new LogRoller(services); 197 logRoller.addWAL(dodgyWAL); 198 logRoller.start(); 199 200 boolean threwOnSync = false; 201 boolean threwOnAppend = false; 202 boolean threwOnBoth = false; 203 204 HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL); 205 try { 206 // Get some random bytes. 207 byte[] value = Bytes.toBytes(getName()); 208 try { 209 // First get something into memstore 210 Put put = new Put(value); 211 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value); 212 region.put(put); 213 } catch (IOException ioe) { 214 fail(); 215 } 216 long rollsCount = rolls.get(); 217 try { 218 dodgyWAL.throwAppendException = true; 219 dodgyWAL.throwSyncException = false; 220 Put put = new Put(value); 221 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value); 222 region.put(put); 223 } catch (IOException ioe) { 224 threwOnAppend = true; 225 } 226 while (rollsCount == rolls.get()) 227 Threads.sleep(100); 228 rollsCount = rolls.get(); 229 230 // When we get to here.. we should be ok. A new WAL has been put in place. There were no 231 // appends to sync. We should be able to continue. 232 233 try { 234 dodgyWAL.throwAppendException = true; 235 dodgyWAL.throwSyncException = true; 236 Put put = new Put(value); 237 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value); 238 region.put(put); 239 } catch (IOException ioe) { 240 threwOnBoth = true; 241 } 242 while (rollsCount == rolls.get()) 243 Threads.sleep(100); 244 245 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able 246 // to just continue. 247 248 // So, should be no abort at this stage. Verify. 249 Mockito.verify(services, Mockito.atLeast(0)).abort(Mockito.anyString(), 250 (Throwable) Mockito.anyObject()); 251 try { 252 dodgyWAL.throwAppendException = false; 253 dodgyWAL.throwSyncException = true; 254 Put put = new Put(value); 255 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value); 256 region.put(put); 257 } catch (IOException ioe) { 258 threwOnSync = true; 259 } 260 // An append in the WAL but the sync failed is a server abort condition. That is our 261 // current semantic. Verify. It takes a while for abort to be called. Just hang here till it 262 // happens. If it don't we'll timeout the whole test. That is fine. 263 while (true) { 264 try { 265 Mockito.verify(services, Mockito.atLeast(1)).abort(Mockito.anyString(), 266 (Throwable) Mockito.anyObject()); 267 break; 268 } catch (WantedButNotInvoked t) { 269 Threads.sleep(1); 270 } 271 } 272 273 try { 274 dodgyWAL.throwAppendException = false; 275 dodgyWAL.throwSyncException = false; 276 dodgyWAL.throwArchiveException = true; 277 Pair<Path, Long> pair = new Pair<Path, Long>(); 278 pair.setFirst(new Path("/a/b/")); 279 pair.setSecond(100L); 280 dodgyWAL.archive(pair); 281 } catch (Throwable ioe) { 282 } 283 while (true) { 284 try { 285 // one more abort needs to be called 286 Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(), 287 (Throwable) Mockito.anyObject()); 288 break; 289 } catch (WantedButNotInvoked t) { 290 Threads.sleep(1); 291 } 292 } 293 } finally { 294 // To stop logRoller, its server has to say it is stopped. 295 Mockito.when(services.isStopped()).thenReturn(true); 296 if (logRoller != null) logRoller.close(); 297 if (region != null) { 298 try { 299 region.close(true); 300 } catch (DroppedSnapshotException e) { 301 LOG.info("On way out; expected!", e); 302 } 303 } 304 if (dodgyWAL != null) dodgyWAL.close(); 305 assertTrue("The regionserver should have thrown an exception", threwOnBoth); 306 assertTrue("The regionserver should have thrown an exception", threwOnAppend); 307 assertTrue("The regionserver should have thrown an exception", threwOnSync); 308 } 309 } 310 311 /** 312 * @return A region on which you must call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} 313 * when done. 314 */ 315 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, 316 Configuration conf, WAL wal) throws IOException { 317 ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null, 318 MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT); 319 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false, 320 Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); 321 } 322}