001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.ArgumentMatchers.any;
023import static org.mockito.ArgumentMatchers.anyString;
024import static org.mockito.Mockito.atLeast;
025import static org.mockito.Mockito.mock;
026import static org.mockito.Mockito.verify;
027import static org.mockito.Mockito.when;
028
029import java.io.IOException;
030import java.util.List;
031import java.util.Map;
032import java.util.concurrent.atomic.AtomicLong;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.fs.FileSystem;
035import org.apache.hadoop.fs.Path;
036import org.apache.hadoop.hbase.Abortable;
037import org.apache.hadoop.hbase.DroppedSnapshotException;
038import org.apache.hadoop.hbase.HBaseClassTestRule;
039import org.apache.hadoop.hbase.HBaseTestingUtility;
040import org.apache.hadoop.hbase.HConstants;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.client.Durability;
043import org.apache.hadoop.hbase.client.Put;
044import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
045import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
046import org.apache.hadoop.hbase.testclassification.SmallTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
049import org.apache.hadoop.hbase.util.Pair;
050import org.apache.hadoop.hbase.util.Threads;
051import org.apache.hadoop.hbase.wal.WAL;
052import org.apache.hadoop.hbase.wal.WALProvider.Writer;
053import org.junit.After;
054import org.junit.Before;
055import org.junit.ClassRule;
056import org.junit.Rule;
057import org.junit.Test;
058import org.junit.experimental.categories.Category;
059import org.junit.rules.TestName;
060import org.mockito.exceptions.verification.WantedButNotInvoked;
061import org.slf4j.Logger;
062import org.slf4j.LoggerFactory;
063
064/**
065 * Testing sync/append failures. Copied from TestHRegion.
066 */
067@Category({ SmallTests.class })
068public class TestFailedAppendAndSync {
069
070  @ClassRule
071  public static final HBaseClassTestRule CLASS_RULE =
072    HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
073
074  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
075  @Rule
076  public TestName name = new TestName();
077
078  private static final String COLUMN_FAMILY = "MyCF";
079  private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
080
081  HRegion region = null;
082  // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
083  private static HBaseTestingUtility TEST_UTIL;
084  public static Configuration CONF;
085  private String dir;
086
087  // Test names
088  protected TableName tableName;
089
090  @Before
091  public void setup() throws IOException {
092    TEST_UTIL = HBaseTestingUtility.createLocalHTU();
093    CONF = TEST_UTIL.getConfiguration();
094    // Disable block cache.
095    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
096    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
097    tableName = TableName.valueOf(name.getMethodName());
098  }
099
100  @After
101  public void tearDown() throws Exception {
102    EnvironmentEdgeManagerTestHelper.reset();
103    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
104    TEST_UTIL.cleanupTestDir();
105  }
106
107  String getName() {
108    return name.getMethodName();
109  }
110
111  /**
112   * Reproduce locking up that happens when we get an exceptions appending and syncing. See
113   * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need
114   * to set up a dodgy WAL that will throw an exception when we go to append to it.
115   */
116  @Test
117  public void testLockupAroundBadAssignSync() throws IOException {
118    final AtomicLong rolls = new AtomicLong(0);
119    // Dodgy WAL. Will throw exceptions when flags set.
120    class DodgyFSLog extends FSHLog {
121      volatile boolean throwSyncException = false;
122      volatile boolean throwAppendException = false;
123      volatile boolean throwArchiveException = false;
124
125      public DodgyFSLog(FileSystem fs, Abortable abortable, Path root, String logDir,
126        Configuration conf) throws IOException {
127        super(fs, abortable, root, logDir, conf);
128      }
129
130      @Override
131      public Map<byte[], List<byte[]>> rollWriter(boolean force)
132        throws FailedLogCloseException, IOException {
133        Map<byte[], List<byte[]>> regions = super.rollWriter(force);
134        rolls.getAndIncrement();
135        return regions;
136      }
137
138      @Override
139      protected void archiveLogFile(Path p) throws IOException {
140        if (throwArchiveException) {
141          throw new IOException("throw archival exception");
142        }
143      }
144
145      @Override
146      protected void archive(Pair<Path, Long> localLogsToArchive) {
147        super.archive(localLogsToArchive);
148      }
149
150      @Override
151      protected Writer createWriterInstance(Path path) throws IOException {
152        final Writer w = super.createWriterInstance(path);
153        return new Writer() {
154          @Override
155          public void close() throws IOException {
156            w.close();
157          }
158
159          @Override
160          public void sync(boolean forceSync) throws IOException {
161            if (throwSyncException) {
162              throw new IOException("FAKE! Failed to replace a bad datanode...");
163            }
164            w.sync(forceSync);
165          }
166
167          @Override
168          public void append(Entry entry) throws IOException {
169            if (throwAppendException) {
170              throw new IOException("FAKE! Failed to replace a bad datanode...");
171            }
172            w.append(entry);
173          }
174
175          @Override
176          public long getLength() {
177            return w.getLength();
178          }
179
180          @Override
181          public long getSyncedLength() {
182            return w.getSyncedLength();
183          }
184        };
185      }
186    }
187
188    // Make up mocked server and services.
189    RegionServerServices services = mock(RegionServerServices.class);
190    when(services.getConfiguration()).thenReturn(CONF);
191    when(services.isStopped()).thenReturn(false);
192    when(services.isAborted()).thenReturn(false);
193    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
194    // the test.
195    FileSystem fs = FileSystem.get(CONF);
196    Path rootDir = new Path(dir + getName());
197    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, services, rootDir, getName(), CONF);
198    dodgyWAL.init();
199    LogRoller logRoller = new LogRoller(services);
200    logRoller.addWAL(dodgyWAL);
201    logRoller.start();
202
203    boolean threwOnSync = false;
204    boolean threwOnAppend = false;
205    boolean threwOnBoth = false;
206
207    HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
208    try {
209      // Get some random bytes.
210      byte[] value = Bytes.toBytes(getName());
211      try {
212        // First get something into memstore
213        Put put = new Put(value);
214        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
215        region.put(put);
216      } catch (IOException ioe) {
217        fail();
218      }
219      long rollsCount = rolls.get();
220      try {
221        dodgyWAL.throwAppendException = true;
222        dodgyWAL.throwSyncException = false;
223        Put put = new Put(value);
224        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
225        region.put(put);
226      } catch (IOException ioe) {
227        threwOnAppend = true;
228      }
229      while (rollsCount == rolls.get())
230        Threads.sleep(100);
231      rollsCount = rolls.get();
232
233      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
234      // appends to sync. We should be able to continue.
235
236      try {
237        dodgyWAL.throwAppendException = true;
238        dodgyWAL.throwSyncException = true;
239        Put put = new Put(value);
240        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
241        region.put(put);
242      } catch (IOException ioe) {
243        threwOnBoth = true;
244      }
245      while (rollsCount == rolls.get())
246        Threads.sleep(100);
247
248      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
249      // to just continue.
250
251      // So, should be no abort at this stage. Verify.
252      verify(services, atLeast(0)).abort(anyString(), any(Throwable.class));
253      try {
254        dodgyWAL.throwAppendException = false;
255        dodgyWAL.throwSyncException = true;
256        Put put = new Put(value);
257        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
258        region.put(put);
259      } catch (IOException ioe) {
260        threwOnSync = true;
261      }
262      // An append in the WAL but the sync failed is a server abort condition. That is our
263      // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
264      // happens. If it don't we'll timeout the whole test. That is fine.
265      while (true) {
266        try {
267          verify(services, atLeast(1)).abort(anyString(), any(Throwable.class));
268          break;
269        } catch (WantedButNotInvoked t) {
270          Threads.sleep(1);
271        }
272      }
273
274      try {
275        dodgyWAL.throwAppendException = false;
276        dodgyWAL.throwSyncException = false;
277        dodgyWAL.throwArchiveException = true;
278        Pair<Path, Long> pair = new Pair<Path, Long>();
279        pair.setFirst(new Path("/a/b/"));
280        pair.setSecond(100L);
281        dodgyWAL.archive(pair);
282      } catch (Throwable ioe) {
283      }
284      while (true) {
285        try {
286          // one more abort needs to be called
287          verify(services, atLeast(2)).abort(anyString(), any());
288          break;
289        } catch (WantedButNotInvoked t) {
290          Threads.sleep(1);
291        }
292      }
293    } finally {
294      // To stop logRoller, its server has to say it is stopped.
295      when(services.isStopped()).thenReturn(true);
296      if (logRoller != null) logRoller.close();
297      if (region != null) {
298        try {
299          region.close(true);
300        } catch (DroppedSnapshotException e) {
301          LOG.info("On way out; expected!", e);
302        }
303      }
304      if (dodgyWAL != null) dodgyWAL.close();
305      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
306      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
307      assertTrue("The regionserver should have thrown an exception", threwOnSync);
308    }
309  }
310
311  /**
312   * @return A region on which you must call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
313   *         when done.
314   */
315  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
316    Configuration conf, WAL wal) throws IOException {
317    ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
318      MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
319    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
320      Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
321  }
322}