001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022import static org.mockito.Mockito.mock;
023import static org.mockito.Mockito.when;
024
025import java.io.IOException;
026import java.util.concurrent.atomic.AtomicLong;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.FileSystem;
029import org.apache.hadoop.fs.Path;
030import org.apache.hadoop.hbase.DroppedSnapshotException;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.Server;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.Durability;
037import org.apache.hadoop.hbase.client.Put;
038import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
039import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
040import org.apache.hadoop.hbase.testclassification.MediumTests;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
043import org.apache.hadoop.hbase.util.Threads;
044import org.apache.hadoop.hbase.wal.WAL;
045import org.apache.hadoop.hbase.wal.WALProvider.Writer;
046import org.junit.After;
047import org.junit.Before;
048import org.junit.ClassRule;
049import org.junit.Rule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052import org.junit.rules.TestName;
053import org.mockito.Mockito;
054import org.mockito.exceptions.verification.WantedButNotInvoked;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058/**
059 * Testing sync/append failures.
060 * Copied from TestHRegion.
061 */
062@Category({MediumTests.class})
063public class TestFailedAppendAndSync {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067      HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
068
069  private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
070  @Rule public TestName name = new TestName();
071
072  private static final String COLUMN_FAMILY = "MyCF";
073  private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
074
075  HRegion region = null;
076  // Do not run unit tests in parallel (? Why not?  It don't work?  Why not?  St.Ack)
077  private static HBaseTestingUtility TEST_UTIL;
078  public static Configuration CONF ;
079  private String dir;
080
081  // Test names
082  protected TableName tableName;
083
084  @Before
085  public void setup() throws IOException {
086    TEST_UTIL = HBaseTestingUtility.createLocalHTU();
087    CONF = TEST_UTIL.getConfiguration();
088    // Disable block cache.
089    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
090    dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
091    tableName = TableName.valueOf(name.getMethodName());
092  }
093
094  @After
095  public void tearDown() throws Exception {
096    EnvironmentEdgeManagerTestHelper.reset();
097    LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
098    TEST_UTIL.cleanupTestDir();
099  }
100
101  String getName() {
102    return name.getMethodName();
103  }
104
105  /**
106   * Reproduce locking up that happens when we get an exceptions appending and syncing.
107   * See HBASE-14317.
108   * First I need to set up some mocks for Server and RegionServerServices. I also need to
109   * set up a dodgy WAL that will throw an exception when we go to append to it.
110   */
111  @Test
112  public void testLockupAroundBadAssignSync() throws IOException {
113    final AtomicLong rolls = new AtomicLong(0);
114    // Dodgy WAL. Will throw exceptions when flags set.
115    class DodgyFSLog extends FSHLog {
116      volatile boolean throwSyncException = false;
117      volatile boolean throwAppendException = false;
118
119      public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
120      throws IOException {
121        super(fs, root, logDir, conf);
122      }
123
124      @Override
125      public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
126        byte [][] regions = super.rollWriter(force);
127        rolls.getAndIncrement();
128        return regions;
129      }
130
131      @Override
132      protected Writer createWriterInstance(Path path) throws IOException {
133        final Writer w = super.createWriterInstance(path);
134          return new Writer() {
135            @Override
136            public void close() throws IOException {
137              w.close();
138            }
139
140            @Override
141            public void sync() throws IOException {
142              if (throwSyncException) {
143                throw new IOException("FAKE! Failed to replace a bad datanode...");
144              }
145              w.sync();
146            }
147
148            @Override
149            public void append(Entry entry) throws IOException {
150              if (throwAppendException) {
151                throw new IOException("FAKE! Failed to replace a bad datanode...");
152              }
153              w.append(entry);
154            }
155
156            @Override
157            public long getLength() {
158              return w.getLength();
159              }
160            };
161          }
162      }
163
164    // Make up mocked server and services.
165    Server server = mock(Server.class);
166    when(server.getConfiguration()).thenReturn(CONF);
167    when(server.isStopped()).thenReturn(false);
168    when(server.isAborted()).thenReturn(false);
169    RegionServerServices services = mock(RegionServerServices.class);
170    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
171    // the test.
172    FileSystem fs = FileSystem.get(CONF);
173    Path rootDir = new Path(dir + getName());
174    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
175    LogRoller logRoller = new LogRoller(server, services);
176    logRoller.addWAL(dodgyWAL);
177    logRoller.start();
178
179    boolean threwOnSync = false;
180    boolean threwOnAppend = false;
181    boolean threwOnBoth = false;
182
183    HRegion region = initHRegion(tableName, null, null, dodgyWAL);
184    try {
185      // Get some random bytes.
186      byte[] value = Bytes.toBytes(getName());
187      try {
188        // First get something into memstore
189        Put put = new Put(value);
190        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
191        region.put(put);
192      } catch (IOException ioe) {
193        fail();
194      }
195      long rollsCount = rolls.get();
196      try {
197        dodgyWAL.throwAppendException = true;
198        dodgyWAL.throwSyncException = false;
199        Put put = new Put(value);
200        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
201        region.put(put);
202      } catch (IOException ioe) {
203        threwOnAppend = true;
204      }
205      while (rollsCount == rolls.get()) Threads.sleep(100);
206      rollsCount = rolls.get();
207
208      // When we get to here.. we should be ok. A new WAL has been put in place. There were no
209      // appends to sync. We should be able to continue.
210
211      try {
212        dodgyWAL.throwAppendException = true;
213        dodgyWAL.throwSyncException = true;
214        Put put = new Put(value);
215        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
216        region.put(put);
217      } catch (IOException ioe) {
218        threwOnBoth = true;
219      }
220      while (rollsCount == rolls.get()) Threads.sleep(100);
221
222      // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
223      // to just continue.
224
225      // So, should be no abort at this stage. Verify.
226      Mockito.verify(server, Mockito.atLeast(0)).
227        abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
228      try {
229        dodgyWAL.throwAppendException = false;
230        dodgyWAL.throwSyncException = true;
231        Put put = new Put(value);
232        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
233        region.put(put);
234      } catch (IOException ioe) {
235        threwOnSync = true;
236      }
237      // An append in the WAL but the sync failed is a server abort condition. That is our
238      // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
239      // happens. If it don't we'll timeout the whole test. That is fine.
240      while (true) {
241        try {
242          Mockito.verify(server, Mockito.atLeast(1)).
243            abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
244          break;
245        } catch (WantedButNotInvoked t) {
246          Threads.sleep(1);
247        }
248      }
249    } finally {
250      // To stop logRoller, its server has to say it is stopped.
251      Mockito.when(server.isStopped()).thenReturn(true);
252      if (logRoller != null) logRoller.close();
253      if (region != null) {
254        try {
255          region.close(true);
256        } catch (DroppedSnapshotException e) {
257          LOG.info("On way out; expected!", e);
258        }
259      }
260      if (dodgyWAL != null) dodgyWAL.close();
261      assertTrue("The regionserver should have thrown an exception", threwOnBoth);
262      assertTrue("The regionserver should have thrown an exception", threwOnAppend);
263      assertTrue("The regionserver should have thrown an exception", threwOnSync);
264    }
265  }
266
267  /**
268   * @return A region on which you must call
269   *         {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done.
270   */
271  public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, WAL wal)
272  throws IOException {
273    ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null);
274    return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, false, Durability.SYNC_WAL,
275      wal, COLUMN_FAMILY_BYTES);
276  }
277}