001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.Mockito.mock;
023import static org.mockito.Mockito.when;
024
025import java.io.IOException;
026import java.util.List;
027import java.util.Map;
028import java.util.concurrent.atomic.AtomicLong;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.Abortable;
033import org.apache.hadoop.hbase.DroppedSnapshotException;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtility;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.Durability;
039import org.apache.hadoop.hbase.client.Put;
040import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
041import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
042import org.apache.hadoop.hbase.testclassification.SmallTests;
043import org.apache.hadoop.hbase.util.Bytes;
044import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
045import org.apache.hadoop.hbase.util.Pair;
046import org.apache.hadoop.hbase.util.Threads;
047import org.apache.hadoop.hbase.wal.WAL;
048import org.apache.hadoop.hbase.wal.WALProvider.Writer;
049import org.junit.After;
050import org.junit.Before;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.mockito.Mockito;
057import org.mockito.exceptions.verification.WantedButNotInvoked;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061/**
062 * Testing sync/append failures.
063 * Copied from TestHRegion.
064 */
065@Category({SmallTests.class})
066public class TestFailedAppendAndSync {
067
068  @ClassRule
069  public static final HBaseClassTestRule CLASS_RULE =
070      HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
071
072  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
073  @Rule public TestName name = new TestName();
074
075  private static final String COLUMN_FAMILY = "MyCF";
076  private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
077
078  HRegion region = null;
079  // Do not run unit tests in parallel (? Why not?  It don't work?  Why not?  St.Ack)
080  private static HBaseTestingUtility TEST_UTIL;
081  public static Configuration CONF ;
082  private String dir;
083
084  // Test names
085  protected TableName tableName;
086
087  @Before
088  public void setup() throws IOException {
089    TEST_UTIL = HBaseTestingUtility.createLocalHTU();
090    CONF = TEST_UTIL.getConfiguration();
091    // Disable block cache.
092    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
093    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
094    tableName = TableName.valueOf(name.getMethodName());
095  }
096
097  @After
098  public void tearDown() throws Exception {
099    EnvironmentEdgeManagerTestHelper.reset();
100    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
101    TEST_UTIL.cleanupTestDir();
102  }
103
104  String getName() {
105    return name.getMethodName();
106  }
107
108  /**
109   * Reproduce locking up that happens when we get an exceptions appending and syncing.
110   * See HBASE-14317.
111   * First I need to set up some mocks for Server and RegionServerServices. I also need to
112   * set up a dodgy WAL that will throw an exception when we go to append to it.
113   */
114  @Test
115  public void testLockupAroundBadAssignSync() throws IOException {
116    final AtomicLong rolls = new AtomicLong(0);
117    // Dodgy WAL. Will throw exceptions when flags set.
118    class DodgyFSLog extends FSHLog {
119      volatile boolean throwSyncException = false;
120      volatile boolean throwAppendException = false;
121      volatile boolean throwArchiveException = false;
122
123      public DodgyFSLog(FileSystem fs, Abortable abortable, Path root, String logDir,
124          Configuration conf) throws IOException {
125        super(fs, abortable, root, logDir, conf);
126      }
127
128      @Override
129      public Map<byte[], List<byte[]>> rollWriter(boolean force)
130          throws FailedLogCloseException, IOException {
131        Map<byte[], List<byte[]>> regions = super.rollWriter(force);
132        rolls.getAndIncrement();
133        return regions;
134      }
135
136      @Override
137      protected void archiveLogFile(Path p) throws IOException {
138        if (throwArchiveException) {
139          throw new IOException("throw archival exception");
140        }
141      }
142
143      @Override
144      protected void archive(Pair<Path, Long> localLogsToArchive) {
145        super.archive(localLogsToArchive);
146      }
147
148      @Override
149      protected Writer createWriterInstance(Path path) throws IOException {
150        final Writer w = super.createWriterInstance(path);
151        return new Writer() {
152          @Override
153          public void close() throws IOException {
154            w.close();
155          }
156
157          @Override
158          public void sync(boolean forceSync) throws IOException {
159            if (throwSyncException) {
160              throw new IOException("FAKE! Failed to replace a bad datanode...");
161            }
162            w.sync(forceSync);
163          }
164
165          @Override
166          public void append(Entry entry) throws IOException {
167            if (throwAppendException) {
168              throw new IOException("FAKE! Failed to replace a bad datanode...");
169            }
170            w.append(entry);
171          }
172
173          @Override
174          public long getLength() {
175            return w.getLength();
176          }
177
178          @Override
179          public long getSyncedLength() {
180            return w.getSyncedLength();
181          }
182        };
183      }
184    }
185
186    // Make up mocked server and services.
187    RegionServerServices services = mock(RegionServerServices.class);
188    when(services.getConfiguration()).thenReturn(CONF);
189    when(services.isStopped()).thenReturn(false);
190    when(services.isAborted()).thenReturn(false);
191    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
192    // the test.
193    FileSystem fs = FileSystem.get(CONF);
194    Path rootDir = new Path(dir + getName());
195    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, services, rootDir, getName(), CONF);
196    dodgyWAL.init();
197    LogRoller logRoller = new LogRoller(services);
198    logRoller.addWAL(dodgyWAL);
199    logRoller.start();
200
201    boolean threwOnSync = false;
202    boolean threwOnAppend = false;
203    boolean threwOnBoth = false;
204
205    HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
206    try {
207      // Get some random bytes.
208      byte[] value = Bytes.toBytes(getName());
209      try {
210        // First get something into memstore
211        Put put = new Put(value);
212        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
213        region.put(put);
214      } catch (IOException ioe) {
215        fail();
216      }
217      long rollsCount = rolls.get();
218      try {
219        dodgyWAL.throwAppendException = true;
220        dodgyWAL.throwSyncException = false;
221        Put put = new Put(value);
222        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
223        region.put(put);
224      } catch (IOException ioe) {
225        threwOnAppend = true;
226      }
227      while (rollsCount == rolls.get()) Threads.sleep(100);
228      rollsCount = rolls.get();
229
230      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
231      // appends to sync. We should be able to continue.
232
233      try {
234        dodgyWAL.throwAppendException = true;
235        dodgyWAL.throwSyncException = true;
236        Put put = new Put(value);
237        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
238        region.put(put);
239      } catch (IOException ioe) {
240        threwOnBoth = true;
241      }
242      while (rollsCount == rolls.get()) Threads.sleep(100);
243
244      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
245      // to just continue.
246
247      // So, should be no abort at this stage. Verify.
248      Mockito.verify(services, Mockito.atLeast(0)).
249        abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
250      try {
251        dodgyWAL.throwAppendException = false;
252        dodgyWAL.throwSyncException = true;
253        Put put = new Put(value);
254        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
255        region.put(put);
256      } catch (IOException ioe) {
257        threwOnSync = true;
258      }
259      // An append in the WAL but the sync failed is a server abort condition. That is our
260      // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
261      // happens. If it don't we'll timeout the whole test. That is fine.
262      while (true) {
263        try {
264          Mockito.verify(services, Mockito.atLeast(1)).
265            abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
266          break;
267        } catch (WantedButNotInvoked t) {
268          Threads.sleep(1);
269        }
270      }
271
272      try {
273        dodgyWAL.throwAppendException = false;
274        dodgyWAL.throwSyncException = false;
275        dodgyWAL.throwArchiveException = true;
276        Pair<Path, Long> pair = new Pair<Path, Long>();
277        pair.setFirst(new Path("/a/b/"));
278        pair.setSecond(100L);
279        dodgyWAL.archive(pair);
280      } catch (Throwable ioe) {
281      }
282      while (true) {
283        try {
284          // one more abort needs to be called
285          Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(),
286            (Throwable) Mockito.anyObject());
287          break;
288        } catch (WantedButNotInvoked t) {
289          Threads.sleep(1);
290        }
291      }
292    } finally {
293      // To stop logRoller, its server has to say it is stopped.
294      Mockito.when(services.isStopped()).thenReturn(true);
295      if (logRoller != null) logRoller.close();
296      if (region != null) {
297        try {
298          region.close(true);
299        } catch (DroppedSnapshotException e) {
300          LOG.info("On way out; expected!", e);
301        }
302      }
303      if (dodgyWAL != null) dodgyWAL.close();
304      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
305      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
306      assertTrue("The regionserver should have thrown an exception", threwOnSync);
307    }
308  }
309
310  /**
311   * @return A region on which you must call
312   *         {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done.
313   */
314  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
315      Configuration conf, WAL wal) throws IOException {
316    ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0,
317      0, null, MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
318    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
319      Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
320  }
321}