001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.Mockito.mock;
023import static org.mockito.Mockito.when;
024
025import java.io.IOException;
026import java.util.List;
027import java.util.Map;
028import java.util.concurrent.atomic.AtomicLong;
029
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.DroppedSnapshotException;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtil;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.Server;
038import org.apache.hadoop.hbase.TableName;
039import org.apache.hadoop.hbase.client.Durability;
040import org.apache.hadoop.hbase.client.Put;
041import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
042import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
043import org.apache.hadoop.hbase.testclassification.SmallTests;
044import org.apache.hadoop.hbase.util.Bytes;
045import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
046import org.apache.hadoop.hbase.util.Pair;
047import org.apache.hadoop.hbase.util.Threads;
048import org.apache.hadoop.hbase.wal.WAL;
049import org.apache.hadoop.hbase.wal.WALProvider.Writer;
050import org.junit.After;
051import org.junit.Before;
052import org.junit.ClassRule;
053import org.junit.Rule;
054import org.junit.Test;
055import org.junit.experimental.categories.Category;
056import org.junit.rules.TestName;
057import org.mockito.Mockito;
058import org.mockito.exceptions.verification.WantedButNotInvoked;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062/**
063 * Testing sync/append failures.
064 * Copied from TestHRegion.
065 */
066@Category({SmallTests.class})
067public class TestFailedAppendAndSync {
068
069  @ClassRule
070  public static final HBaseClassTestRule CLASS_RULE =
071      HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
072
073  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
074  @Rule public TestName name = new TestName();
075
076  private static final String COLUMN_FAMILY = "MyCF";
077  private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
078
079  HRegion region = null;
080  // Do not run unit tests in parallel (? Why not?  It don't work?  Why not?  St.Ack)
081  private static HBaseTestingUtil TEST_UTIL;
082  public static Configuration CONF ;
083  private String dir;
084
085  // Test names
086  protected TableName tableName;
087
088  @Before
089  public void setup() throws IOException {
090    TEST_UTIL = new HBaseTestingUtil();
091    CONF = TEST_UTIL.getConfiguration();
092    // Disable block cache.
093    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
094    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
095    tableName = TableName.valueOf(name.getMethodName());
096  }
097
098  @After
099  public void tearDown() throws Exception {
100    EnvironmentEdgeManagerTestHelper.reset();
101    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
102    TEST_UTIL.cleanupTestDir();
103  }
104
105  String getName() {
106    return name.getMethodName();
107  }
108
109  // Dodgy WAL. Will throw exceptions when flags set.
110  class DodgyFSLog extends FSHLog {
111    volatile boolean throwSyncException = false;
112    volatile boolean throwAppendException = false;
113    volatile boolean throwArchiveException = false;
114
115    final AtomicLong rolls = new AtomicLong(0);
116
117    public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf)
118        throws IOException {
119      super(fs, server, root, logDir, conf);
120    }
121
122    @Override
123    public Map<byte[], List<byte[]>> rollWriter(boolean force)
124        throws FailedLogCloseException, IOException {
125      Map<byte[], List<byte[]>> regions = super.rollWriter(force);
126      rolls.getAndIncrement();
127      return regions;
128    }
129
130    @Override
131    protected void archiveLogFile(Path p) throws IOException {
132      if (throwArchiveException) {
133        throw new IOException("throw archival exception");
134      }
135    }
136
137    @Override
138    protected void archive(Pair<Path, Long> localLogsToArchive) {
139      super.archive(localLogsToArchive);
140    }
141
142    @Override
143    protected Writer createWriterInstance(Path path) throws IOException {
144      final Writer w = super.createWriterInstance(path);
145      return new Writer() {
146        @Override
147        public void close() throws IOException {
148          w.close();
149        }
150
151        @Override
152        public void sync(boolean forceSync) throws IOException {
153          if (throwSyncException) {
154            throw new IOException("FAKE! Failed to replace a bad datanode...");
155          }
156          w.sync(forceSync);
157        }
158
159        @Override
160        public void append(Entry entry) throws IOException {
161          if (throwAppendException) {
162            throw new IOException("FAKE! Failed to replace a bad datanode...");
163          }
164          w.append(entry);
165        }
166
167        @Override
168        public long getLength() {
169          return w.getLength();
170        }
171
172        @Override
173        public long getSyncedLength() {
174          return w.getSyncedLength();
175        }
176      };
177    }
178  }
179  /**
180   * Reproduce locking up that happens when we get an exceptions appending and syncing.
181   * See HBASE-14317.
182   * First I need to set up some mocks for Server and RegionServerServices. I also need to
183   * set up a dodgy WAL that will throw an exception when we go to append to it.
184   */
185  @Test
186  public void testLockupAroundBadAssignSync() throws IOException {
187    // Make up mocked server and services.
188    RegionServerServices services = mock(RegionServerServices.class);
189    when(services.getConfiguration()).thenReturn(CONF);
190    when(services.isStopped()).thenReturn(false);
191    when(services.isAborted()).thenReturn(false);
192    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
193    // the test.
194    FileSystem fs = FileSystem.get(CONF);
195    Path rootDir = new Path(dir + getName());
196    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server)services, rootDir, getName(), CONF);
197    dodgyWAL.init();
198    LogRoller logRoller = new LogRoller(services);
199    logRoller.addWAL(dodgyWAL);
200    logRoller.start();
201
202    boolean threwOnSync = false;
203    boolean threwOnAppend = false;
204    boolean threwOnBoth = false;
205
206    HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
207    try {
208      // Get some random bytes.
209      byte[] value = Bytes.toBytes(getName());
210      try {
211        // First get something into memstore
212        Put put = new Put(value);
213        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
214        region.put(put);
215      } catch (IOException ioe) {
216        fail();
217      }
218      long rollsCount = dodgyWAL.rolls.get();
219      try {
220        dodgyWAL.throwAppendException = true;
221        dodgyWAL.throwSyncException = false;
222        Put put = new Put(value);
223        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
224        region.put(put);
225      } catch (IOException ioe) {
226        threwOnAppend = true;
227      }
228      while (rollsCount == dodgyWAL.rolls.get()) {
229        Threads.sleep(100);
230      }
231      rollsCount = dodgyWAL.rolls.get();
232
233      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
234      // appends to sync. We should be able to continue.
235
236      try {
237        dodgyWAL.throwAppendException = true;
238        dodgyWAL.throwSyncException = true;
239        Put put = new Put(value);
240        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
241        region.put(put);
242      } catch (IOException ioe) {
243        threwOnBoth = true;
244      }
245      while (rollsCount == dodgyWAL.rolls.get()) {
246        Threads.sleep(100);
247      }
248
249      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
250      // to just continue.
251
252      // So, should be no abort at this stage. Verify.
253      Mockito.verify(services, Mockito.atLeast(0)).abort(Mockito.anyString(),
254        Mockito.any(Throwable.class));
255      try {
256        dodgyWAL.throwAppendException = false;
257        dodgyWAL.throwSyncException = true;
258        Put put = new Put(value);
259        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
260        region.put(put);
261      } catch (IOException ioe) {
262        threwOnSync = true;
263      }
264      // An append in the WAL but the sync failed is a server abort condition. That is our
265      // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
266      // happens. If it don't we'll timeout the whole test. That is fine.
267      while (true) {
268        try {
269          Mockito.verify(services, Mockito.atLeast(1)).abort(Mockito.anyString(),
270            Mockito.any(Throwable.class));
271          break;
272        } catch (WantedButNotInvoked t) {
273          Threads.sleep(1);
274        }
275      }
276
277      try {
278        dodgyWAL.throwAppendException = false;
279        dodgyWAL.throwSyncException = false;
280        dodgyWAL.throwArchiveException = true;
281        Pair<Path, Long> pair = new Pair<Path, Long>();
282        pair.setFirst(new Path("/a/b/"));
283        pair.setSecond(100L);
284        dodgyWAL.archive(pair);
285      } catch (Throwable ioe) {
286      }
287      while (true) {
288        try {
289          // one more abort needs to be called
290          Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(),
291            (Throwable) Mockito.anyObject());
292          break;
293        } catch (WantedButNotInvoked t) {
294          Threads.sleep(1);
295        }
296      }
297    } finally {
298      // To stop logRoller, its server has to say it is stopped.
299      Mockito.when(services.isStopped()).thenReturn(true);
300      if (logRoller != null) logRoller.close();
301      if (region != null) {
302        try {
303          region.close(true);
304        } catch (DroppedSnapshotException e) {
305          LOG.info("On way out; expected!", e);
306        }
307      }
308      if (dodgyWAL != null) dodgyWAL.close();
309      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
310      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
311      assertTrue("The regionserver should have thrown an exception", threwOnSync);
312    }
313  }
314
315  /**
316   * @return A region on which you must call
317   *         {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)} when done.
318   */
319  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
320      Configuration conf, WAL wal) throws IOException {
321    ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0,
322      0, null, MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
323    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
324      Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
325  }
326}