001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.ArgumentMatchers.any;
023import static org.mockito.ArgumentMatchers.anyString;
024import static org.mockito.Mockito.atLeast;
025import static org.mockito.Mockito.mock;
026import static org.mockito.Mockito.verify;
027import static org.mockito.Mockito.when;
028
029import java.io.IOException;
030import java.util.List;
031import java.util.Map;
032import java.util.concurrent.atomic.AtomicLong;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.fs.FileSystem;
035import org.apache.hadoop.fs.Path;
036import org.apache.hadoop.hbase.DroppedSnapshotException;
037import org.apache.hadoop.hbase.HBaseClassTestRule;
038import org.apache.hadoop.hbase.HBaseTestingUtil;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.Server;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.client.Durability;
043import org.apache.hadoop.hbase.client.Put;
044import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
045import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
046import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
047import org.apache.hadoop.hbase.testclassification.SmallTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
050import org.apache.hadoop.hbase.util.Pair;
051import org.apache.hadoop.hbase.util.Threads;
052import org.apache.hadoop.hbase.wal.WAL;
053import org.apache.hadoop.hbase.wal.WALProvider.Writer;
054import org.junit.After;
055import org.junit.Before;
056import org.junit.ClassRule;
057import org.junit.Rule;
058import org.junit.Test;
059import org.junit.experimental.categories.Category;
060import org.junit.rules.TestName;
061import org.mockito.exceptions.verification.WantedButNotInvoked;
062import org.slf4j.Logger;
063import org.slf4j.LoggerFactory;
064
065/**
066 * Testing sync/append failures. Copied from TestHRegion.
067 */
068@Category({ SmallTests.class })
069public class TestFailedAppendAndSync {
070
071  @ClassRule
072  public static final HBaseClassTestRule CLASS_RULE =
073    HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
074
075  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
076  @Rule
077  public TestName name = new TestName();
078
079  private static final String COLUMN_FAMILY = "MyCF";
080  private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
081
082  HRegion region = null;
083  // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
084  private static HBaseTestingUtil TEST_UTIL;
085  public static Configuration CONF;
086  private String dir;
087
088  // Test names
089  protected TableName tableName;
090
091  @Before
092  public void setup() throws IOException {
093    TEST_UTIL = new HBaseTestingUtil();
094    CONF = TEST_UTIL.getConfiguration();
095    // Disable block cache.
096    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
097    CONF.setLong(AbstractFSWAL.WAL_SYNC_TIMEOUT_MS, 10000);
098    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
099    tableName = TableName.valueOf(name.getMethodName());
100  }
101
102  @After
103  public void tearDown() throws Exception {
104    EnvironmentEdgeManagerTestHelper.reset();
105    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
106    TEST_UTIL.cleanupTestDir();
107  }
108
109  String getName() {
110    return name.getMethodName();
111  }
112
113  // Dodgy WAL. Will throw exceptions when flags set.
114  class DodgyFSLog extends FSHLog {
115    volatile boolean throwSyncException = false;
116    volatile boolean throwAppendException = false;
117    volatile boolean throwArchiveException = false;
118
119    final AtomicLong rolls = new AtomicLong(0);
120
121    public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf)
122      throws IOException {
123      super(fs, server, root, logDir, conf);
124    }
125
126    @Override
127    public Map<byte[], List<byte[]>> rollWriter(boolean force)
128      throws FailedLogCloseException, IOException {
129      Map<byte[], List<byte[]>> regions = super.rollWriter(force);
130      rolls.getAndIncrement();
131      return regions;
132    }
133
134    @Override
135    protected void archiveLogFile(Path p) throws IOException {
136      if (throwArchiveException) {
137        throw new IOException("throw archival exception");
138      }
139    }
140
141    @Override
142    protected void archive(Pair<Path, Long> localLogsToArchive) {
143      super.archive(localLogsToArchive);
144    }
145
146    @Override
147    protected Writer createWriterInstance(FileSystem fs, Path path) throws IOException {
148      final Writer w = super.createWriterInstance(fs, path);
149      return new Writer() {
150        @Override
151        public void close() throws IOException {
152          w.close();
153        }
154
155        @Override
156        public void sync(boolean forceSync) throws IOException {
157          if (throwSyncException) {
158            throw new IOException("FAKE! Failed to replace a bad datanode...");
159          }
160          w.sync(forceSync);
161        }
162
163        @Override
164        public void append(Entry entry) throws IOException {
165          if (throwAppendException) {
166            throw new IOException("FAKE! Failed to replace a bad datanode...");
167          }
168          w.append(entry);
169        }
170
171        @Override
172        public long getLength() {
173          return w.getLength();
174        }
175
176        @Override
177        public long getSyncedLength() {
178          return w.getSyncedLength();
179        }
180      };
181    }
182  }
183
184  /**
185   * Reproduce locking up that happens when we get an exceptions appending and syncing. See
186   * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need
187   * to set up a dodgy WAL that will throw an exception when we go to append to it.
188   */
189  @Test
190  public void testLockupAroundBadAssignSync() throws IOException {
191    // Make up mocked server and services.
192    RegionServerServices services = mock(RegionServerServices.class);
193    when(services.getConfiguration()).thenReturn(CONF);
194    when(services.isStopped()).thenReturn(false);
195    when(services.isAborted()).thenReturn(false);
196    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
197    // the test.
198    FileSystem fs = FileSystem.get(CONF);
199    Path rootDir = new Path(dir + getName());
200    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server) services, rootDir, getName(), CONF);
201    dodgyWAL.init();
202    LogRoller logRoller = new LogRoller(services);
203    logRoller.addWAL(dodgyWAL);
204    logRoller.start();
205
206    boolean threwOnSync = false;
207    boolean threwOnAppend = false;
208    boolean threwOnBoth = false;
209
210    HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
211    try {
212      // Get some random bytes.
213      byte[] value = Bytes.toBytes(getName());
214      try {
215        // First get something into memstore
216        Put put = new Put(value);
217        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
218        region.put(put);
219      } catch (IOException ioe) {
220        fail();
221      }
222      long rollsCount = dodgyWAL.rolls.get();
223      try {
224        dodgyWAL.throwAppendException = true;
225        dodgyWAL.throwSyncException = false;
226        Put put = new Put(value);
227        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
228        region.put(put);
229      } catch (IOException ioe) {
230        threwOnAppend = true;
231      }
232      while (rollsCount == dodgyWAL.rolls.get()) {
233        Threads.sleep(100);
234      }
235      rollsCount = dodgyWAL.rolls.get();
236
237      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
238      // appends to sync. We should be able to continue.
239
240      try {
241        dodgyWAL.throwAppendException = true;
242        dodgyWAL.throwSyncException = true;
243        Put put = new Put(value);
244        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
245        region.put(put);
246      } catch (IOException ioe) {
247        threwOnBoth = true;
248      }
249      while (rollsCount == dodgyWAL.rolls.get()) {
250        Threads.sleep(100);
251      }
252
253      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
254      // to just continue.
255
256      // So, should be no abort at this stage. Verify.
257      verify(services, atLeast(0)).abort(anyString(), any(Throwable.class));
258      try {
259        dodgyWAL.throwAppendException = false;
260        dodgyWAL.throwSyncException = true;
261        Put put = new Put(value);
262        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
263        region.rsServices = services;
264        region.put(put);
265      } catch (IOException ioe) {
266        threwOnSync = true;
267      }
268
269      region.rsServices = null;
270      // An append in the WAL but the sync failed is a server abort condition. That is our
271      // current semantic. Verify.
272      verify(services, atLeast(1)).abort(anyString(), any());
273      try {
274        dodgyWAL.throwAppendException = false;
275        dodgyWAL.throwSyncException = false;
276        dodgyWAL.throwArchiveException = true;
277        Pair<Path, Long> pair = new Pair<Path, Long>();
278        pair.setFirst(new Path("/a/b/"));
279        pair.setSecond(100L);
280        dodgyWAL.archive(pair);
281      } catch (Throwable ioe) {
282      }
283      while (true) {
284        try {
285          // one more abort needs to be called
286          verify(services, atLeast(2)).abort(anyString(), any());
287          break;
288        } catch (WantedButNotInvoked t) {
289          Threads.sleep(1);
290        }
291      }
292    } finally {
293      // To stop logRoller, its server has to say it is stopped.
294      when(services.isStopped()).thenReturn(true);
295      if (logRoller != null) logRoller.close();
296      if (region != null) {
297        try {
298          region.close(true);
299        } catch (DroppedSnapshotException e) {
300          LOG.info("On way out; expected!", e);
301        }
302      }
303      if (dodgyWAL != null) dodgyWAL.close();
304      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
305      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
306      assertTrue("The regionserver should have thrown an exception", threwOnSync);
307    }
308  }
309
310  /**
311   * @return A region on which you must call {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)}
312   *         when done.
313   */
314  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
315    Configuration conf, WAL wal) throws IOException {
316    ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
317      MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
318    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
319      Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
320  }
321}