001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.Mockito.mock;
023import static org.mockito.Mockito.when;
024
025import java.io.IOException;
026import java.util.List;
027import java.util.Map;
028import java.util.concurrent.atomic.AtomicLong;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.DroppedSnapshotException;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HBaseTestingUtil;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.Server;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.Durability;
039import org.apache.hadoop.hbase.client.Put;
040import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
041import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
042import org.apache.hadoop.hbase.testclassification.SmallTests;
043import org.apache.hadoop.hbase.util.Bytes;
044import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
045import org.apache.hadoop.hbase.util.Pair;
046import org.apache.hadoop.hbase.util.Threads;
047import org.apache.hadoop.hbase.wal.WAL;
048import org.apache.hadoop.hbase.wal.WALProvider.Writer;
049import org.junit.After;
050import org.junit.Before;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.mockito.Mockito;
057import org.mockito.exceptions.verification.WantedButNotInvoked;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061/**
062 * Testing sync/append failures. Copied from TestHRegion.
063 */
064@Category({ SmallTests.class })
065public class TestFailedAppendAndSync {
066
067  @ClassRule
068  public static final HBaseClassTestRule CLASS_RULE =
069    HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
070
071  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
072  @Rule
073  public TestName name = new TestName();
074
075  private static final String COLUMN_FAMILY = "MyCF";
076  private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
077
078  HRegion region = null;
079  // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
080  private static HBaseTestingUtil TEST_UTIL;
081  public static Configuration CONF;
082  private String dir;
083
084  // Test names
085  protected TableName tableName;
086
087  @Before
088  public void setup() throws IOException {
089    TEST_UTIL = new HBaseTestingUtil();
090    CONF = TEST_UTIL.getConfiguration();
091    // Disable block cache.
092    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
093    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
094    tableName = TableName.valueOf(name.getMethodName());
095  }
096
097  @After
098  public void tearDown() throws Exception {
099    EnvironmentEdgeManagerTestHelper.reset();
100    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
101    TEST_UTIL.cleanupTestDir();
102  }
103
104  String getName() {
105    return name.getMethodName();
106  }
107
108  // Dodgy WAL. Will throw exceptions when flags set.
109  class DodgyFSLog extends FSHLog {
110    volatile boolean throwSyncException = false;
111    volatile boolean throwAppendException = false;
112    volatile boolean throwArchiveException = false;
113
114    final AtomicLong rolls = new AtomicLong(0);
115
116    public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf)
117      throws IOException {
118      super(fs, server, root, logDir, conf);
119    }
120
121    @Override
122    public Map<byte[], List<byte[]>> rollWriter(boolean force)
123      throws FailedLogCloseException, IOException {
124      Map<byte[], List<byte[]>> regions = super.rollWriter(force);
125      rolls.getAndIncrement();
126      return regions;
127    }
128
129    @Override
130    protected void archiveLogFile(Path p) throws IOException {
131      if (throwArchiveException) {
132        throw new IOException("throw archival exception");
133      }
134    }
135
136    @Override
137    protected void archive(Pair<Path, Long> localLogsToArchive) {
138      super.archive(localLogsToArchive);
139    }
140
141    @Override
142    protected Writer createWriterInstance(Path path) throws IOException {
143      final Writer w = super.createWriterInstance(path);
144      return new Writer() {
145        @Override
146        public void close() throws IOException {
147          w.close();
148        }
149
150        @Override
151        public void sync(boolean forceSync) throws IOException {
152          if (throwSyncException) {
153            throw new IOException("FAKE! Failed to replace a bad datanode...");
154          }
155          w.sync(forceSync);
156        }
157
158        @Override
159        public void append(Entry entry) throws IOException {
160          if (throwAppendException) {
161            throw new IOException("FAKE! Failed to replace a bad datanode...");
162          }
163          w.append(entry);
164        }
165
166        @Override
167        public long getLength() {
168          return w.getLength();
169        }
170
171        @Override
172        public long getSyncedLength() {
173          return w.getSyncedLength();
174        }
175      };
176    }
177  }
178
179  /**
180   * Reproduce locking up that happens when we get an exceptions appending and syncing. See
181   * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need
182   * to set up a dodgy WAL that will throw an exception when we go to append to it.
183   */
184  @Test
185  public void testLockupAroundBadAssignSync() throws IOException {
186    // Make up mocked server and services.
187    RegionServerServices services = mock(RegionServerServices.class);
188    when(services.getConfiguration()).thenReturn(CONF);
189    when(services.isStopped()).thenReturn(false);
190    when(services.isAborted()).thenReturn(false);
191    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
192    // the test.
193    FileSystem fs = FileSystem.get(CONF);
194    Path rootDir = new Path(dir + getName());
195    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server) services, rootDir, getName(), CONF);
196    dodgyWAL.init();
197    LogRoller logRoller = new LogRoller(services);
198    logRoller.addWAL(dodgyWAL);
199    logRoller.start();
200
201    boolean threwOnSync = false;
202    boolean threwOnAppend = false;
203    boolean threwOnBoth = false;
204
205    HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
206    try {
207      // Get some random bytes.
208      byte[] value = Bytes.toBytes(getName());
209      try {
210        // First get something into memstore
211        Put put = new Put(value);
212        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
213        region.put(put);
214      } catch (IOException ioe) {
215        fail();
216      }
217      long rollsCount = dodgyWAL.rolls.get();
218      try {
219        dodgyWAL.throwAppendException = true;
220        dodgyWAL.throwSyncException = false;
221        Put put = new Put(value);
222        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
223        region.put(put);
224      } catch (IOException ioe) {
225        threwOnAppend = true;
226      }
227      while (rollsCount == dodgyWAL.rolls.get()) {
228        Threads.sleep(100);
229      }
230      rollsCount = dodgyWAL.rolls.get();
231
232      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
233      // appends to sync. We should be able to continue.
234
235      try {
236        dodgyWAL.throwAppendException = true;
237        dodgyWAL.throwSyncException = true;
238        Put put = new Put(value);
239        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
240        region.put(put);
241      } catch (IOException ioe) {
242        threwOnBoth = true;
243      }
244      while (rollsCount == dodgyWAL.rolls.get()) {
245        Threads.sleep(100);
246      }
247
248      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
249      // to just continue.
250
251      // So, should be no abort at this stage. Verify.
252      Mockito.verify(services, Mockito.atLeast(0)).abort(Mockito.anyString(),
253        Mockito.any(Throwable.class));
254      try {
255        dodgyWAL.throwAppendException = false;
256        dodgyWAL.throwSyncException = true;
257        Put put = new Put(value);
258        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
259        region.put(put);
260      } catch (IOException ioe) {
261        threwOnSync = true;
262      }
263      // An append in the WAL but the sync failed is a server abort condition. That is our
264      // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
265      // happens. If it don't we'll timeout the whole test. That is fine.
266      while (true) {
267        try {
268          Mockito.verify(services, Mockito.atLeast(1)).abort(Mockito.anyString(),
269            Mockito.any(Throwable.class));
270          break;
271        } catch (WantedButNotInvoked t) {
272          Threads.sleep(1);
273        }
274      }
275
276      try {
277        dodgyWAL.throwAppendException = false;
278        dodgyWAL.throwSyncException = false;
279        dodgyWAL.throwArchiveException = true;
280        Pair<Path, Long> pair = new Pair<Path, Long>();
281        pair.setFirst(new Path("/a/b/"));
282        pair.setSecond(100L);
283        dodgyWAL.archive(pair);
284      } catch (Throwable ioe) {
285      }
286      while (true) {
287        try {
288          // one more abort needs to be called
289          Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(),
290            (Throwable) Mockito.anyObject());
291          break;
292        } catch (WantedButNotInvoked t) {
293          Threads.sleep(1);
294        }
295      }
296    } finally {
297      // To stop logRoller, its server has to say it is stopped.
298      Mockito.when(services.isStopped()).thenReturn(true);
299      if (logRoller != null) logRoller.close();
300      if (region != null) {
301        try {
302          region.close(true);
303        } catch (DroppedSnapshotException e) {
304          LOG.info("On way out; expected!", e);
305        }
306      }
307      if (dodgyWAL != null) dodgyWAL.close();
308      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
309      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
310      assertTrue("The regionserver should have thrown an exception", threwOnSync);
311    }
312  }
313
314  /**
315   * @return A region on which you must call {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)}
316   *         when done.
317   */
318  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
319    Configuration conf, WAL wal) throws IOException {
320    ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
321      MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
322    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
323      Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
324  }
325}