001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.Mockito.mock;
023import static org.mockito.Mockito.when;
024
025import java.io.IOException;
026import java.util.concurrent.atomic.AtomicLong;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.FileSystem;
029import org.apache.hadoop.fs.Path;
030import org.apache.hadoop.hbase.DroppedSnapshotException;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.Durability;
036import org.apache.hadoop.hbase.client.Put;
037import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
038import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
039import org.apache.hadoop.hbase.testclassification.MediumTests;
040import org.apache.hadoop.hbase.util.Bytes;
041import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
042import org.apache.hadoop.hbase.util.Threads;
043import org.apache.hadoop.hbase.wal.WAL;
044import org.apache.hadoop.hbase.wal.WALProvider.Writer;
045import org.junit.After;
046import org.junit.Before;
047import org.junit.ClassRule;
048import org.junit.Rule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.junit.rules.TestName;
052import org.mockito.Mockito;
053import org.mockito.exceptions.verification.WantedButNotInvoked;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057/**
058 * Testing sync/append failures.
059 * Copied from TestHRegion.
060 */
061@Category({MediumTests.class})
062public class TestFailedAppendAndSync {
063
064  @ClassRule
065  public static final HBaseClassTestRule CLASS_RULE =
066      HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
067
068  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
069  @Rule public TestName name = new TestName();
070
071  private static final String COLUMN_FAMILY = "MyCF";
072  private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
073
074  HRegion region = null;
075  // Do not run unit tests in parallel (? Why not?  It don't work?  Why not?  St.Ack)
076  private static HBaseTestingUtility TEST_UTIL;
077  public static Configuration CONF ;
078  private String dir;
079
080  // Test names
081  protected TableName tableName;
082
083  @Before
084  public void setup() throws IOException {
085    TEST_UTIL = HBaseTestingUtility.createLocalHTU();
086    CONF = TEST_UTIL.getConfiguration();
087    // Disable block cache.
088    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
089    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
090    tableName = TableName.valueOf(name.getMethodName());
091  }
092
093  @After
094  public void tearDown() throws Exception {
095    EnvironmentEdgeManagerTestHelper.reset();
096    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
097    TEST_UTIL.cleanupTestDir();
098  }
099
100  String getName() {
101    return name.getMethodName();
102  }
103
104  /**
105   * Reproduce locking up that happens when we get an exceptions appending and syncing.
106   * See HBASE-14317.
107   * First I need to set up some mocks for Server and RegionServerServices. I also need to
108   * set up a dodgy WAL that will throw an exception when we go to append to it.
109   */
110  @Test
111  public void testLockupAroundBadAssignSync() throws IOException {
112    final AtomicLong rolls = new AtomicLong(0);
113    // Dodgy WAL. Will throw exceptions when flags set.
114    class DodgyFSLog extends FSHLog {
115      volatile boolean throwSyncException = false;
116      volatile boolean throwAppendException = false;
117
118      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
119      throws IOException {
120        super(fs, root, logDir, conf);
121      }
122
123      @Override
124      public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
125        byte [][] regions = super.rollWriter(force);
126        rolls.getAndIncrement();
127        return regions;
128      }
129
130      @Override
131      protected Writer createWriterInstance(Path path) throws IOException {
132        final Writer w = super.createWriterInstance(path);
133          return new Writer() {
134            @Override
135            public void close() throws IOException {
136              w.close();
137            }
138
139            @Override
140            public void sync(boolean forceSync) throws IOException {
141              if (throwSyncException) {
142                throw new IOException("FAKE! Failed to replace a bad datanode...");
143              }
144              w.sync(forceSync);
145            }
146
147            @Override
148            public void append(Entry entry) throws IOException {
149              if (throwAppendException) {
150                throw new IOException("FAKE! Failed to replace a bad datanode...");
151              }
152              w.append(entry);
153            }
154
155            @Override
156            public long getLength() {
157              return w.getLength();
158              }
159            };
160          }
161      }
162
163    // Make up mocked server and services.
164    RegionServerServices services = mock(RegionServerServices.class);
165    when(services.getConfiguration()).thenReturn(CONF);
166    when(services.isStopped()).thenReturn(false);
167    when(services.isAborted()).thenReturn(false);
168    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
169    // the test.
170    FileSystem fs = FileSystem.get(CONF);
171    Path rootDir = new Path(dir + getName());
172    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
173    dodgyWAL.init();
174    LogRoller logRoller = new LogRoller(services);
175    logRoller.addWAL(dodgyWAL);
176    logRoller.start();
177
178    boolean threwOnSync = false;
179    boolean threwOnAppend = false;
180    boolean threwOnBoth = false;
181
182    HRegion region = initHRegion(tableName, null, null, dodgyWAL);
183    try {
184      // Get some random bytes.
185      byte[] value = Bytes.toBytes(getName());
186      try {
187        // First get something into memstore
188        Put put = new Put(value);
189        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
190        region.put(put);
191      } catch (IOException ioe) {
192        fail();
193      }
194      long rollsCount = rolls.get();
195      try {
196        dodgyWAL.throwAppendException = true;
197        dodgyWAL.throwSyncException = false;
198        Put put = new Put(value);
199        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
200        region.put(put);
201      } catch (IOException ioe) {
202        threwOnAppend = true;
203      }
204      while (rollsCount == rolls.get()) Threads.sleep(100);
205      rollsCount = rolls.get();
206
207      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
208      // appends to sync. We should be able to continue.
209
210      try {
211        dodgyWAL.throwAppendException = true;
212        dodgyWAL.throwSyncException = true;
213        Put put = new Put(value);
214        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
215        region.put(put);
216      } catch (IOException ioe) {
217        threwOnBoth = true;
218      }
219      while (rollsCount == rolls.get()) Threads.sleep(100);
220
221      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
222      // to just continue.
223
224      // So, should be no abort at this stage. Verify.
225      Mockito.verify(services, Mockito.atLeast(0)).
226        abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
227      try {
228        dodgyWAL.throwAppendException = false;
229        dodgyWAL.throwSyncException = true;
230        Put put = new Put(value);
231        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
232        region.put(put);
233      } catch (IOException ioe) {
234        threwOnSync = true;
235      }
236      // An append in the WAL but the sync failed is a server abort condition. That is our
237      // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
238      // happens. If it don't we'll timeout the whole test. That is fine.
239      while (true) {
240        try {
241          Mockito.verify(services, Mockito.atLeast(1)).
242            abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
243          break;
244        } catch (WantedButNotInvoked t) {
245          Threads.sleep(1);
246        }
247      }
248    } finally {
249      // To stop logRoller, its server has to say it is stopped.
250      Mockito.when(services.isStopped()).thenReturn(true);
251      if (logRoller != null) logRoller.close();
252      if (region != null) {
253        try {
254          region.close(true);
255        } catch (DroppedSnapshotException e) {
256          LOG.info("On way out; expected!", e);
257        }
258      }
259      if (dodgyWAL != null) dodgyWAL.close();
260      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
261      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
262      assertTrue("The regionserver should have thrown an exception", threwOnSync);
263    }
264  }
265
266  /**
267   * @return A region on which you must call
268   *         {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done.
269   */
270  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, WAL wal)
271  throws IOException {
272    ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null);
273    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, false, Durability.SYNC_WAL,
274      wal, COLUMN_FAMILY_BYTES);
275  }
276}