001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.wal;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.lang.reflect.Field;
026import java.util.List;
027import java.util.NavigableMap;
028import java.util.TreeMap;
029import java.util.concurrent.CountDownLatch;
030import java.util.concurrent.ExecutorService;
031import java.util.concurrent.Executors;
032import java.util.concurrent.TimeUnit;
033import java.util.concurrent.atomic.AtomicBoolean;
034import org.apache.hadoop.conf.Configuration;
035import org.apache.hadoop.fs.FileSystem;
036import org.apache.hadoop.fs.Path;
037import org.apache.hadoop.hbase.HBaseClassTestRule;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.TableName;
040import org.apache.hadoop.hbase.Waiter;
041import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
042import org.apache.hadoop.hbase.client.Put;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.RegionInfoBuilder;
045import org.apache.hadoop.hbase.client.TableDescriptor;
046import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
047import org.apache.hadoop.hbase.regionserver.ChunkCreator;
048import org.apache.hadoop.hbase.regionserver.HRegion;
049import org.apache.hadoop.hbase.regionserver.MemStoreLAB;
050import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
051import org.apache.hadoop.hbase.testclassification.MediumTests;
052import org.apache.hadoop.hbase.testclassification.RegionServerTests;
053import org.apache.hadoop.hbase.util.Bytes;
054import org.apache.hadoop.hbase.util.CommonFSUtils;
055import org.apache.hadoop.hbase.util.Threads;
056import org.apache.hadoop.hbase.wal.WAL;
057import org.apache.hadoop.hbase.wal.WALEdit;
058import org.apache.hadoop.hbase.wal.WALKey;
059import org.apache.hadoop.hbase.wal.WALProvider;
060import org.junit.ClassRule;
061import org.junit.Rule;
062import org.junit.Test;
063import org.junit.experimental.categories.Category;
064import org.junit.rules.TestName;
065
066/**
067 * Provides FSHLog test cases.
068 */
069@Category({ RegionServerTests.class, MediumTests.class })
070public class TestFSHLog extends AbstractTestFSWAL {
071
072  @ClassRule
073  public static final HBaseClassTestRule CLASS_RULE =
074      HBaseClassTestRule.forClass(TestFSHLog.class);
075
076  private static final long TEST_TIMEOUT_MS = 10000;
077
078  @Rule
079  public TestName name = new TestName();
080
081  @Override
082  protected AbstractFSWAL<?> newWAL(FileSystem fs, Path rootDir, String walDir, String archiveDir,
083      Configuration conf, List<WALActionsListener> listeners, boolean failIfWALExists,
084      String prefix, String suffix) throws IOException {
085    FSHLog wal =
086      new FSHLog(fs, rootDir, walDir, archiveDir, conf, listeners, failIfWALExists, prefix, suffix);
087    wal.init();
088    return wal;
089  }
090
091  @Override
092  protected AbstractFSWAL<?> newSlowWAL(FileSystem fs, Path rootDir, String walDir,
093      String archiveDir, Configuration conf, List<WALActionsListener> listeners,
094      boolean failIfWALExists, String prefix, String suffix, final Runnable action)
095      throws IOException {
096    FSHLog wal = new FSHLog(fs, rootDir, walDir, archiveDir, conf, listeners, failIfWALExists,
097        prefix, suffix) {
098
099      @Override
100      protected void atHeadOfRingBufferEventHandlerAppend() {
101        action.run();
102        super.atHeadOfRingBufferEventHandlerAppend();
103      }
104    };
105    wal.init();
106    return wal;
107  }
108
109  @Test
110  public void testSyncRunnerIndexOverflow() throws IOException, NoSuchFieldException,
111      SecurityException, IllegalArgumentException, IllegalAccessException {
112    final String name = this.name.getMethodName();
113    FSHLog log = new FSHLog(FS, CommonFSUtils.getRootDir(CONF), name,
114      HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
115    log.init();
116    try {
117      Field ringBufferEventHandlerField = FSHLog.class.getDeclaredField("ringBufferEventHandler");
118      ringBufferEventHandlerField.setAccessible(true);
119      FSHLog.RingBufferEventHandler ringBufferEventHandler =
120          (FSHLog.RingBufferEventHandler) ringBufferEventHandlerField.get(log);
121      Field syncRunnerIndexField =
122          FSHLog.RingBufferEventHandler.class.getDeclaredField("syncRunnerIndex");
123      syncRunnerIndexField.setAccessible(true);
124      syncRunnerIndexField.set(ringBufferEventHandler, Integer.MAX_VALUE - 1);
125      TableDescriptor htd =
126          TableDescriptorBuilder.newBuilder(TableName.valueOf(this.name.getMethodName()))
127            .setColumnFamily(ColumnFamilyDescriptorBuilder.of("row")).build();
128      NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
129      for (byte[] fam : htd.getColumnFamilyNames()) {
130        scopes.put(fam, 0);
131      }
132      RegionInfo hri = RegionInfoBuilder.newBuilder(htd.getTableName()).build();
133      MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
134      for (int i = 0; i < 10; i++) {
135        addEdits(log, hri, htd, 1, mvcc, scopes, "row");
136      }
137    } finally {
138      log.close();
139    }
140  }
141
142  /**
143   * Test for WAL stall due to sync future overwrites. See HBASE-25984.
144   */
145  @Test
146  public void testDeadlockWithSyncOverwrites() throws Exception {
147    final CountDownLatch blockBeforeSafePoint = new CountDownLatch(1);
148
149    class FailingWriter implements WALProvider.Writer {
150      @Override public void sync(boolean forceSync) throws IOException {
151        throw new IOException("Injected failure..");
152      }
153
154      @Override public void append(WAL.Entry entry) throws IOException {
155      }
156
157      @Override public long getLength() {
158        return 0;
159      }
160
161      @Override
162      public long getSyncedLength() {
163        return 0;
164      }
165
166      @Override public void close() throws IOException {
167      }
168    }
169
170    /*
171     * Custom FSHLog implementation with a conditional wait before attaining safe point.
172     */
173    class CustomFSHLog extends FSHLog {
174      public CustomFSHLog(FileSystem fs, Path rootDir, String logDir, String archiveDir,
175                          Configuration conf, List<WALActionsListener> listeners, boolean failIfWALExists,
176                          String prefix, String suffix) throws IOException {
177        super(fs, rootDir, logDir, archiveDir, conf, listeners, failIfWALExists, prefix, suffix);
178      }
179
180      @Override
181      protected void beforeWaitOnSafePoint() {
182        try {
183          assertTrue(blockBeforeSafePoint.await(TEST_TIMEOUT_MS, TimeUnit.MILLISECONDS));
184        } catch (InterruptedException e) {
185          throw new RuntimeException(e);
186        }
187      }
188
189      public SyncFuture publishSyncOnRingBuffer() {
190        long sequence = getSequenceOnRingBuffer();
191        return publishSyncOnRingBuffer(sequence, false);
192      }
193    }
194
195    final String name = this.name.getMethodName();
196    try (CustomFSHLog log = new CustomFSHLog(FS, CommonFSUtils.getRootDir(CONF), name,
197        HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null)) {
198      log.setWriter(new FailingWriter());
199      Field ringBufferEventHandlerField =
200          FSHLog.class.getDeclaredField("ringBufferEventHandler");
201      ringBufferEventHandlerField.setAccessible(true);
202      FSHLog.RingBufferEventHandler ringBufferEventHandler =
203          (FSHLog.RingBufferEventHandler) ringBufferEventHandlerField.get(log);
204      // Force a safe point
205      FSHLog.SafePointZigZagLatch latch = ringBufferEventHandler.attainSafePoint();
206      try {
207        SyncFuture future0 = log.publishSyncOnRingBuffer();
208        // Wait for the sync to be done.
209        Waiter.waitFor(CONF, TEST_TIMEOUT_MS, future0::isDone);
210        // Publish another sync from the same thread, this should not overwrite the done sync.
211        SyncFuture future1 = log.publishSyncOnRingBuffer();
212        assertFalse(future1.isDone());
213        // Unblock the safe point trigger..
214        blockBeforeSafePoint.countDown();
215        // Wait for the safe point to be reached.
216        // With the deadlock in HBASE-25984, this is never possible, thus blocking the sync pipeline.
217        Waiter.waitFor(CONF, TEST_TIMEOUT_MS, latch::isSafePointAttained);
218      } finally {
219        // Force release the safe point, for the clean up.
220        latch.releaseSafePoint();
221      }
222    }
223  }
224
225  /**
226   * Test case for https://issues.apache.org/jira/browse/HBASE-16721
227   */
228  @Test
229  public void testUnflushedSeqIdTracking() throws IOException, InterruptedException {
230    final String name = this.name.getMethodName();
231    final byte[] b = Bytes.toBytes("b");
232
233    final AtomicBoolean startHoldingForAppend = new AtomicBoolean(false);
234    final CountDownLatch holdAppend = new CountDownLatch(1);
235    final CountDownLatch flushFinished = new CountDownLatch(1);
236    final CountDownLatch putFinished = new CountDownLatch(1);
237
238    try (FSHLog log = new FSHLog(FS, CommonFSUtils.getRootDir(CONF), name,
239      HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null)) {
240      log.init();
241      log.registerWALActionsListener(new WALActionsListener() {
242        @Override
243        public void visitLogEntryBeforeWrite(RegionInfo info, WALKey logKey, WALEdit logEdit) {
244          if (startHoldingForAppend.get()) {
245            try {
246              holdAppend.await();
247            } catch (InterruptedException e) {
248              LOG.error(e.toString(), e);
249            }
250          }
251        }
252      });
253
254      // open a new region which uses this WAL
255      TableDescriptor htd =
256          TableDescriptorBuilder.newBuilder(TableName.valueOf(this.name.getMethodName()))
257            .setColumnFamily(ColumnFamilyDescriptorBuilder.of(b)).build();
258      RegionInfo hri = RegionInfoBuilder.newBuilder(htd.getTableName()).build();
259      ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0,
260        0, null, MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
261      final HRegion region = TEST_UTIL.createLocalHRegion(hri, CONF, htd, log);
262      ExecutorService exec = Executors.newFixedThreadPool(2);
263
264      // do a regular write first because of memstore size calculation.
265      region.put(new Put(b).addColumn(b, b,b));
266
267      startHoldingForAppend.set(true);
268      exec.submit(new Runnable() {
269        @Override
270        public void run() {
271          try {
272            region.put(new Put(b).addColumn(b, b,b));
273            putFinished.countDown();
274          } catch (IOException e) {
275            LOG.error(e.toString(), e);
276          }
277        }
278      });
279
280      // give the put a chance to start
281      Threads.sleep(3000);
282
283      exec.submit(new Runnable() {
284        @Override
285        public void run() {
286          try {
287            HRegion.FlushResult flushResult = region.flush(true);
288            LOG.info("Flush result:" +  flushResult.getResult());
289            LOG.info("Flush succeeded:" +  flushResult.isFlushSucceeded());
290            flushFinished.countDown();
291          } catch (IOException e) {
292            LOG.error(e.toString(), e);
293          }
294        }
295      });
296
297      // give the flush a chance to start. Flush should have got the region lock, and
298      // should have been waiting on the mvcc complete after this.
299      Threads.sleep(3000);
300
301      // let the append to WAL go through now that the flush already started
302      holdAppend.countDown();
303      putFinished.await();
304      flushFinished.await();
305
306      // check whether flush went through
307      assertEquals("Region did not flush?", 1, region.getStoreFileList(new byte[][]{b}).size());
308
309      // now check the region's unflushed seqIds.
310      long seqId = log.getEarliestMemStoreSeqNum(hri.getEncodedNameAsBytes());
311      assertEquals("Found seqId for the region which is already flushed",
312          HConstants.NO_SEQNUM, seqId);
313
314      region.close();
315    }
316  }
317}