001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.Mockito.mock;
023import static org.mockito.Mockito.when;
024
025import java.io.IOException;
026import java.util.List;
027import java.util.Map;
028import java.util.concurrent.atomic.AtomicLong;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.Abortable;
033import org.apache.hadoop.hbase.DroppedSnapshotException;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtility;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.Durability;
039import org.apache.hadoop.hbase.client.Put;
040import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
041import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
042import org.apache.hadoop.hbase.testclassification.SmallTests;
043import org.apache.hadoop.hbase.util.Bytes;
044import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
045import org.apache.hadoop.hbase.util.Pair;
046import org.apache.hadoop.hbase.util.Threads;
047import org.apache.hadoop.hbase.wal.WAL;
048import org.apache.hadoop.hbase.wal.WALProvider.Writer;
049import org.junit.After;
050import org.junit.Before;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.mockito.Mockito;
057import org.mockito.exceptions.verification.WantedButNotInvoked;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061/**
062 * Testing sync/append failures. Copied from TestHRegion.
063 */
064@Category({ SmallTests.class })
065public class TestFailedAppendAndSync {
066
067  @ClassRule
068  public static final HBaseClassTestRule CLASS_RULE =
069    HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
070
071  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
072  @Rule
073  public TestName name = new TestName();
074
075  private static final String COLUMN_FAMILY = "MyCF";
076  private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
077
078  HRegion region = null;
079  // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
080  private static HBaseTestingUtility TEST_UTIL;
081  public static Configuration CONF;
082  private String dir;
083
084  // Test names
085  protected TableName tableName;
086
087  @Before
088  public void setup() throws IOException {
089    TEST_UTIL = HBaseTestingUtility.createLocalHTU();
090    CONF = TEST_UTIL.getConfiguration();
091    // Disable block cache.
092    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
093    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
094    tableName = TableName.valueOf(name.getMethodName());
095  }
096
097  @After
098  public void tearDown() throws Exception {
099    EnvironmentEdgeManagerTestHelper.reset();
100    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
101    TEST_UTIL.cleanupTestDir();
102  }
103
104  String getName() {
105    return name.getMethodName();
106  }
107
108  /**
109   * Reproduce locking up that happens when we get an exceptions appending and syncing. See
110   * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need
111   * to set up a dodgy WAL that will throw an exception when we go to append to it.
112   */
113  @Test
114  public void testLockupAroundBadAssignSync() throws IOException {
115    final AtomicLong rolls = new AtomicLong(0);
116    // Dodgy WAL. Will throw exceptions when flags set.
117    class DodgyFSLog extends FSHLog {
118      volatile boolean throwSyncException = false;
119      volatile boolean throwAppendException = false;
120      volatile boolean throwArchiveException = false;
121
122      public DodgyFSLog(FileSystem fs, Abortable abortable, Path root, String logDir,
123        Configuration conf) throws IOException {
124        super(fs, abortable, root, logDir, conf);
125      }
126
127      @Override
128      public Map<byte[], List<byte[]>> rollWriter(boolean force)
129        throws FailedLogCloseException, IOException {
130        Map<byte[], List<byte[]>> regions = super.rollWriter(force);
131        rolls.getAndIncrement();
132        return regions;
133      }
134
135      @Override
136      protected void archiveLogFile(Path p) throws IOException {
137        if (throwArchiveException) {
138          throw new IOException("throw archival exception");
139        }
140      }
141
142      @Override
143      protected void archive(Pair<Path, Long> localLogsToArchive) {
144        super.archive(localLogsToArchive);
145      }
146
147      @Override
148      protected Writer createWriterInstance(Path path) throws IOException {
149        final Writer w = super.createWriterInstance(path);
150        return new Writer() {
151          @Override
152          public void close() throws IOException {
153            w.close();
154          }
155
156          @Override
157          public void sync(boolean forceSync) throws IOException {
158            if (throwSyncException) {
159              throw new IOException("FAKE! Failed to replace a bad datanode...");
160            }
161            w.sync(forceSync);
162          }
163
164          @Override
165          public void append(Entry entry) throws IOException {
166            if (throwAppendException) {
167              throw new IOException("FAKE! Failed to replace a bad datanode...");
168            }
169            w.append(entry);
170          }
171
172          @Override
173          public long getLength() {
174            return w.getLength();
175          }
176
177          @Override
178          public long getSyncedLength() {
179            return w.getSyncedLength();
180          }
181        };
182      }
183    }
184
185    // Make up mocked server and services.
186    RegionServerServices services = mock(RegionServerServices.class);
187    when(services.getConfiguration()).thenReturn(CONF);
188    when(services.isStopped()).thenReturn(false);
189    when(services.isAborted()).thenReturn(false);
190    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
191    // the test.
192    FileSystem fs = FileSystem.get(CONF);
193    Path rootDir = new Path(dir + getName());
194    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, services, rootDir, getName(), CONF);
195    dodgyWAL.init();
196    LogRoller logRoller = new LogRoller(services);
197    logRoller.addWAL(dodgyWAL);
198    logRoller.start();
199
200    boolean threwOnSync = false;
201    boolean threwOnAppend = false;
202    boolean threwOnBoth = false;
203
204    HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
205    try {
206      // Get some random bytes.
207      byte[] value = Bytes.toBytes(getName());
208      try {
209        // First get something into memstore
210        Put put = new Put(value);
211        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
212        region.put(put);
213      } catch (IOException ioe) {
214        fail();
215      }
216      long rollsCount = rolls.get();
217      try {
218        dodgyWAL.throwAppendException = true;
219        dodgyWAL.throwSyncException = false;
220        Put put = new Put(value);
221        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
222        region.put(put);
223      } catch (IOException ioe) {
224        threwOnAppend = true;
225      }
226      while (rollsCount == rolls.get())
227        Threads.sleep(100);
228      rollsCount = rolls.get();
229
230      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
231      // appends to sync. We should be able to continue.
232
233      try {
234        dodgyWAL.throwAppendException = true;
235        dodgyWAL.throwSyncException = true;
236        Put put = new Put(value);
237        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
238        region.put(put);
239      } catch (IOException ioe) {
240        threwOnBoth = true;
241      }
242      while (rollsCount == rolls.get())
243        Threads.sleep(100);
244
245      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
246      // to just continue.
247
248      // So, should be no abort at this stage. Verify.
249      Mockito.verify(services, Mockito.atLeast(0)).abort(Mockito.anyString(),
250        (Throwable) Mockito.anyObject());
251      try {
252        dodgyWAL.throwAppendException = false;
253        dodgyWAL.throwSyncException = true;
254        Put put = new Put(value);
255        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
256        region.put(put);
257      } catch (IOException ioe) {
258        threwOnSync = true;
259      }
260      // An append in the WAL but the sync failed is a server abort condition. That is our
261      // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
262      // happens. If it don't we'll timeout the whole test. That is fine.
263      while (true) {
264        try {
265          Mockito.verify(services, Mockito.atLeast(1)).abort(Mockito.anyString(),
266            (Throwable) Mockito.anyObject());
267          break;
268        } catch (WantedButNotInvoked t) {
269          Threads.sleep(1);
270        }
271      }
272
273      try {
274        dodgyWAL.throwAppendException = false;
275        dodgyWAL.throwSyncException = false;
276        dodgyWAL.throwArchiveException = true;
277        Pair<Path, Long> pair = new Pair<Path, Long>();
278        pair.setFirst(new Path("/a/b/"));
279        pair.setSecond(100L);
280        dodgyWAL.archive(pair);
281      } catch (Throwable ioe) {
282      }
283      while (true) {
284        try {
285          // one more abort needs to be called
286          Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(),
287            (Throwable) Mockito.anyObject());
288          break;
289        } catch (WantedButNotInvoked t) {
290          Threads.sleep(1);
291        }
292      }
293    } finally {
294      // To stop logRoller, its server has to say it is stopped.
295      Mockito.when(services.isStopped()).thenReturn(true);
296      if (logRoller != null) logRoller.close();
297      if (region != null) {
298        try {
299          region.close(true);
300        } catch (DroppedSnapshotException e) {
301          LOG.info("On way out; expected!", e);
302        }
303      }
304      if (dodgyWAL != null) dodgyWAL.close();
305      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
306      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
307      assertTrue("The regionserver should have thrown an exception", threwOnSync);
308    }
309  }
310
311  /**
312   * @return A region on which you must call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
313   *         when done.
314   */
315  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
316    Configuration conf, WAL wal) throws IOException {
317    ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
318      MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
319    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
320      Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
321  }
322}