001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.wal;
019
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022
023import java.io.IOException;
024import java.util.List;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.Abortable;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.StartTestingClusterOption;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.TableDescriptor;
038import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
039import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor;
040import org.apache.hadoop.hbase.regionserver.HRegion;
041import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
042import org.apache.hadoop.hbase.regionserver.HRegionServer;
043import org.apache.hadoop.hbase.regionserver.RegionServerServices;
044import org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink;
045import org.apache.hadoop.hbase.testclassification.LargeTests;
046import org.apache.hadoop.hbase.testclassification.RegionServerTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.CommonFSUtils;
049import org.apache.hadoop.hbase.wal.AsyncFSWALProvider;
050import org.apache.hadoop.hbase.wal.WAL;
051import org.apache.hadoop.hbase.wal.WALFactory;
052import org.apache.hadoop.hbase.wal.WALProvider;
053import org.junit.AfterClass;
054import org.junit.BeforeClass;
055import org.junit.ClassRule;
056import org.junit.Test;
057import org.junit.experimental.categories.Category;
058
059import org.apache.hbase.thirdparty.io.netty.channel.Channel;
060import org.apache.hbase.thirdparty.io.netty.channel.EventLoopGroup;
061
062@Category({ RegionServerTests.class, LargeTests.class })
063public class TestWALSyncTimeoutException {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067    HBaseClassTestRule.forClass(TestWALSyncTimeoutException.class);
068
069  private static final byte[] FAMILY = Bytes.toBytes("family_test");
070
071  private static final byte[] QUAL = Bytes.toBytes("qualifier_test");
072
073  private static final HBaseTestingUtil HTU = new HBaseTestingUtil();
074
075  private static TableName tableName = TableName.valueOf("TestWALSyncTimeoutException");
076  private static volatile boolean testWALTimout = false;
077  private static final long timeoutMIlliseconds = 3000;
078  private static final String USER_THREAD_NAME = tableName.getNameAsString();
079
080  @BeforeClass
081  public static void setUp() throws Exception {
082    Configuration conf = HTU.getConfiguration();
083    conf.setClass(HConstants.REGION_IMPL, HRegionForTest.class, HRegion.class);
084    conf.setInt(RegionReplicationSink.RETRIES_NUMBER, 1);
085    conf.setLong(RegionReplicationSink.RPC_TIMEOUT_MS, 10 * 60 * 1000);
086    conf.setLong(RegionReplicationSink.OPERATION_TIMEOUT_MS, 20 * 60 * 1000);
087    conf.setLong(RegionReplicationSink.META_EDIT_RPC_TIMEOUT_MS, 10 * 60 * 1000);
088    conf.setLong(RegionReplicationSink.META_EDIT_OPERATION_TIMEOUT_MS, 20 * 60 * 1000);
089    conf.setClass(WALFactory.WAL_PROVIDER, SlowAsyncFSWALProvider.class, WALProvider.class);
090    conf.setLong(AbstractFSWAL.WAL_SYNC_TIMEOUT_MS, timeoutMIlliseconds);
091    HTU.startMiniCluster(StartTestingClusterOption.builder().numRegionServers(1).build());
092
093  }
094
095  @AfterClass
096  public static void tearDown() throws Exception {
097    HTU.shutdownMiniCluster();
098  }
099
100  /**
101   * This test is for HBASE-27230. When {@link WAL#sync} timeout, it would throws
102   * {@link WALSyncTimeoutIOException},and when {@link HRegion#doWALAppend} catches this exception
103   * it aborts the RegionServer.
104   */
105  @Test
106  public void testWALSyncWriteException() throws Exception {
107    final HRegionForTest region = this.createTable();
108
109    String oldThreadName = Thread.currentThread().getName();
110    Thread.currentThread().setName(USER_THREAD_NAME);
111    try {
112      byte[] rowKey1 = Bytes.toBytes(1);
113      byte[] value1 = Bytes.toBytes(3);
114      Thread.sleep(2000);
115      testWALTimout = true;
116
117      /**
118       * The {@link WAL#sync} would timeout and throws {@link WALSyncTimeoutIOException},when
119       * {@link HRegion#doWALAppend} catches this exception it aborts the RegionServer.
120       */
121      try {
122        region.put(new Put(rowKey1).addColumn(FAMILY, QUAL, value1));
123        fail();
124      } catch (WALSyncTimeoutIOException e) {
125        assertTrue(e != null);
126      }
127      assertTrue(region.getRSServices().isAborted());
128    } finally {
129      Thread.currentThread().setName(oldThreadName);
130      testWALTimout = false;
131    }
132  }
133
134  private HRegionForTest createTable() throws Exception {
135    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
136      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
137    HTU.getAdmin().createTable(tableDescriptor);
138    HRegionServer rs = HTU.getMiniHBaseCluster().getRegionServer(0);
139    return (HRegionForTest) rs.getRegions(tableName).get(0);
140  }
141
142  public static final class HRegionForTest extends HRegion {
143
144    public HRegionForTest(HRegionFileSystem fs, WAL wal, Configuration confParam,
145      TableDescriptor htd, RegionServerServices rsServices) {
146      super(fs, wal, confParam, htd, rsServices);
147    }
148
149    @SuppressWarnings("deprecation")
150    public HRegionForTest(Path tableDir, WAL wal, FileSystem fs, Configuration confParam,
151      RegionInfo regionInfo, TableDescriptor htd, RegionServerServices rsServices) {
152      super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices);
153    }
154
155    public RegionServerServices getRSServices() {
156      return this.rsServices;
157    }
158
159  }
160
161  public static class SlowAsyncFSWAL extends AsyncFSWAL {
162
163    public SlowAsyncFSWAL(FileSystem fs, Abortable abortable, Path rootDir, String logDir,
164      String archiveDir, Configuration conf, List<WALActionsListener> listeners,
165      boolean failIfWALExists, String prefix, String suffix, EventLoopGroup eventLoopGroup,
166      Class<? extends Channel> channelClass, StreamSlowMonitor monitor)
167      throws FailedLogCloseException, IOException {
168      super(fs, abortable, rootDir, logDir, archiveDir, conf, listeners, failIfWALExists, prefix,
169        suffix, null, null, eventLoopGroup, channelClass, monitor);
170    }
171
172    @Override
173    protected void atHeadOfRingBufferEventHandlerAppend() {
174      if (testWALTimout) {
175        try {
176          Thread.sleep(timeoutMIlliseconds + 1000);
177        } catch (InterruptedException e) {
178          throw new RuntimeException(e);
179        }
180      }
181      super.atHeadOfRingBufferEventHandlerAppend();
182    }
183
184  }
185
186  public static class SlowAsyncFSWALProvider extends AsyncFSWALProvider {
187
188    @Override
189    protected AsyncFSWAL createWAL() throws IOException {
190      return new SlowAsyncFSWAL(CommonFSUtils.getWALFileSystem(conf), this.abortable,
191        CommonFSUtils.getWALRootDir(conf), getWALDirectoryName(factory.getFactoryId()),
192        getWALArchiveDirectoryName(conf, factory.getFactoryId()), conf, listeners, true, logPrefix,
193        META_WAL_PROVIDER_ID.equals(providerId) ? META_WAL_PROVIDER_ID : null, eventLoopGroup,
194        channelClass, factory.getExcludeDatanodeManager().getStreamSlowMonitor(providerId));
195    }
196
197  }
198}