001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.ArgumentMatchers.any;
023import static org.mockito.ArgumentMatchers.anyString;
024import static org.mockito.Mockito.atLeast;
025import static org.mockito.Mockito.mock;
026import static org.mockito.Mockito.verify;
027import static org.mockito.Mockito.when;
028
029import java.io.IOException;
030import java.util.List;
031import java.util.Map;
032import java.util.concurrent.atomic.AtomicLong;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.fs.FileSystem;
035import org.apache.hadoop.fs.Path;
036import org.apache.hadoop.hbase.DroppedSnapshotException;
037import org.apache.hadoop.hbase.HBaseClassTestRule;
038import org.apache.hadoop.hbase.HBaseTestingUtil;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.Server;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.client.Durability;
043import org.apache.hadoop.hbase.client.Put;
044import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
045import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
046import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
047import org.apache.hadoop.hbase.testclassification.SmallTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
050import org.apache.hadoop.hbase.util.Pair;
051import org.apache.hadoop.hbase.util.Threads;
052import org.apache.hadoop.hbase.wal.WAL;
053import org.apache.hadoop.hbase.wal.WALProvider.Writer;
054import org.junit.After;
055import org.junit.Before;
056import org.junit.ClassRule;
057import org.junit.Rule;
058import org.junit.Test;
059import org.junit.experimental.categories.Category;
060import org.junit.rules.TestName;
061import org.mockito.exceptions.verification.WantedButNotInvoked;
062import org.slf4j.Logger;
063import org.slf4j.LoggerFactory;
064
065/**
066 * Testing sync/append failures. Copied from TestHRegion.
067 */
068@Category({ SmallTests.class })
069public class TestFailedAppendAndSync {
070
071  @ClassRule
072  public static final HBaseClassTestRule CLASS_RULE =
073    HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
074
075  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
076  @Rule
077  public TestName name = new TestName();
078
079  private static final String COLUMN_FAMILY = "MyCF";
080  private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
081
082  HRegion region = null;
083  // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
084  private static HBaseTestingUtil TEST_UTIL;
085  public static Configuration CONF;
086  private String dir;
087
088  // Test names
089  protected TableName tableName;
090
091  @Before
092  public void setup() throws IOException {
093    TEST_UTIL = new HBaseTestingUtil();
094    CONF = TEST_UTIL.getConfiguration();
095    // Disable block cache.
096    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
097    CONF.setLong(AbstractFSWAL.WAL_SYNC_TIMEOUT_MS, 10000);
098    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
099    tableName = TableName.valueOf(name.getMethodName());
100  }
101
102  @After
103  public void tearDown() throws Exception {
104    EnvironmentEdgeManagerTestHelper.reset();
105    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
106    TEST_UTIL.cleanupTestDir();
107  }
108
109  String getName() {
110    return name.getMethodName();
111  }
112
113  // Dodgy WAL. Will throw exceptions when flags set.
114  class DodgyFSLog extends FSHLog {
115    volatile boolean throwSyncException = false;
116    volatile boolean throwAppendException = false;
117    volatile boolean throwArchiveException = false;
118
119    final AtomicLong rolls = new AtomicLong(0);
120
121    public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf)
122      throws IOException {
123      super(fs, server, root, logDir, conf);
124    }
125
126    @Override
127    public Map<byte[], List<byte[]>> rollWriter(boolean force)
128      throws FailedLogCloseException, IOException {
129      Map<byte[], List<byte[]>> regions = super.rollWriter(force);
130      rolls.getAndIncrement();
131      return regions;
132    }
133
134    @Override
135    protected void archiveLogFile(Path p) throws IOException {
136      if (throwArchiveException) {
137        throw new IOException("throw archival exception");
138      }
139    }
140
141    @Override
142    protected void archive(Pair<Path, Long> localLogsToArchive) {
143      super.archive(localLogsToArchive);
144    }
145
146    @Override
147    protected Writer createWriterInstance(FileSystem fs, Path path) throws IOException {
148      final Writer w = super.createWriterInstance(fs, path);
149      return new Writer() {
150        @Override
151        public void close() throws IOException {
152          w.close();
153        }
154
155        @Override
156        public void sync(boolean forceSync) throws IOException {
157          if (throwSyncException) {
158            throw new IOException("FAKE! Failed to replace a bad datanode...");
159          }
160          w.sync(forceSync);
161        }
162
163        @Override
164        public void append(Entry entry) throws IOException {
165          if (throwAppendException) {
166            throw new IOException("FAKE! Failed to replace a bad datanode...");
167          }
168          w.append(entry);
169        }
170
171        @Override
172        public long getLength() {
173          return w.getLength();
174        }
175
176        @Override
177        public long getSyncedLength() {
178          return w.getSyncedLength();
179        }
180      };
181    }
182  }
183
184  /**
185   * Reproduce locking up that happens when we get an exceptions appending and syncing. See
186   * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need
187   * to set up a dodgy WAL that will throw an exception when we go to append to it.
188   */
189  @Test
190  public void testLockupAroundBadAssignSync() throws IOException {
191    // Make up mocked server and services.
192    RegionServerServices services = mock(RegionServerServices.class);
193    when(services.getConfiguration()).thenReturn(CONF);
194    when(services.isStopped()).thenReturn(false);
195    when(services.isAborted()).thenReturn(false);
196    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
197    // the test.
198    FileSystem fs = FileSystem.get(CONF);
199    Path rootDir = new Path(dir + getName());
200    fs.mkdirs(new Path(rootDir, getName()));
201    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server) services, rootDir, getName(), CONF);
202    dodgyWAL.init();
203    LogRoller logRoller = new LogRoller(services);
204    logRoller.addWAL(dodgyWAL);
205    logRoller.start();
206
207    boolean threwOnSync = false;
208    boolean threwOnAppend = false;
209    boolean threwOnBoth = false;
210
211    HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
212    try {
213      // Get some random bytes.
214      byte[] value = Bytes.toBytes(getName());
215      try {
216        // First get something into memstore
217        Put put = new Put(value);
218        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
219        region.put(put);
220      } catch (IOException ioe) {
221        fail();
222      }
223      long rollsCount = dodgyWAL.rolls.get();
224      try {
225        dodgyWAL.throwAppendException = true;
226        dodgyWAL.throwSyncException = false;
227        Put put = new Put(value);
228        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
229        region.put(put);
230      } catch (IOException ioe) {
231        threwOnAppend = true;
232      }
233      while (rollsCount == dodgyWAL.rolls.get()) {
234        Threads.sleep(100);
235      }
236      rollsCount = dodgyWAL.rolls.get();
237
238      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
239      // appends to sync. We should be able to continue.
240
241      try {
242        dodgyWAL.throwAppendException = true;
243        dodgyWAL.throwSyncException = true;
244        Put put = new Put(value);
245        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
246        region.put(put);
247      } catch (IOException ioe) {
248        threwOnBoth = true;
249      }
250      while (rollsCount == dodgyWAL.rolls.get()) {
251        Threads.sleep(100);
252      }
253
254      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
255      // to just continue.
256
257      // So, should be no abort at this stage. Verify.
258      verify(services, atLeast(0)).abort(anyString(), any(Throwable.class));
259      try {
260        dodgyWAL.throwAppendException = false;
261        dodgyWAL.throwSyncException = true;
262        Put put = new Put(value);
263        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
264        region.rsServices = services;
265        region.put(put);
266      } catch (IOException ioe) {
267        threwOnSync = true;
268      }
269
270      region.rsServices = null;
271      // An append in the WAL but the sync failed is a server abort condition. That is our
272      // current semantic. Verify.
273      verify(services, atLeast(1)).abort(anyString(), any());
274      try {
275        dodgyWAL.throwAppendException = false;
276        dodgyWAL.throwSyncException = false;
277        dodgyWAL.throwArchiveException = true;
278        Pair<Path, Long> pair = new Pair<Path, Long>();
279        pair.setFirst(new Path("/a/b/"));
280        pair.setSecond(100L);
281        dodgyWAL.archive(pair);
282      } catch (Throwable ioe) {
283      }
284      while (true) {
285        try {
286          // one more abort needs to be called
287          verify(services, atLeast(2)).abort(anyString(), any());
288          break;
289        } catch (WantedButNotInvoked t) {
290          Threads.sleep(1);
291        }
292      }
293    } finally {
294      // To stop logRoller, its server has to say it is stopped.
295      when(services.isStopped()).thenReturn(true);
296      if (logRoller != null) logRoller.close();
297      if (region != null) {
298        try {
299          region.close(true);
300        } catch (DroppedSnapshotException e) {
301          LOG.info("On way out; expected!", e);
302        }
303      }
304      if (dodgyWAL != null) dodgyWAL.close();
305      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
306      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
307      assertTrue("The regionserver should have thrown an exception", threwOnSync);
308    }
309  }
310
311  /**
312   * @return A region on which you must call {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)}
313   *         when done.
314   */
315  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
316    Configuration conf, WAL wal) throws IOException {
317    ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
318      MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
319    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
320      Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
321  }
322}