001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.wal;
019
020import static org.junit.jupiter.api.Assertions.assertTrue;
021import static org.junit.jupiter.api.Assertions.fail;
022
023import java.io.IOException;
024import java.util.List;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.Abortable;
029import org.apache.hadoop.hbase.HBaseTestingUtil;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.StartTestingClusterOption;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.TableDescriptor;
037import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
038import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor;
039import org.apache.hadoop.hbase.regionserver.HRegion;
040import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
041import org.apache.hadoop.hbase.regionserver.HRegionServer;
042import org.apache.hadoop.hbase.regionserver.RegionServerServices;
043import org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink;
044import org.apache.hadoop.hbase.testclassification.LargeTests;
045import org.apache.hadoop.hbase.testclassification.RegionServerTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.CommonFSUtils;
048import org.apache.hadoop.hbase.wal.AsyncFSWALProvider;
049import org.apache.hadoop.hbase.wal.WAL;
050import org.apache.hadoop.hbase.wal.WALFactory;
051import org.apache.hadoop.hbase.wal.WALProvider;
052import org.junit.jupiter.api.AfterAll;
053import org.junit.jupiter.api.BeforeAll;
054import org.junit.jupiter.api.Tag;
055import org.junit.jupiter.api.Test;
056
057import org.apache.hbase.thirdparty.io.netty.channel.Channel;
058import org.apache.hbase.thirdparty.io.netty.channel.EventLoopGroup;
059
060@Tag(RegionServerTests.TAG)
061@Tag(LargeTests.TAG)
062public class TestWALSyncTimeoutException {
063
064  private static final byte[] FAMILY = Bytes.toBytes("family_test");
065
066  private static final byte[] QUAL = Bytes.toBytes("qualifier_test");
067
068  private static final HBaseTestingUtil HTU = new HBaseTestingUtil();
069
070  private static TableName tableName = TableName.valueOf("TestWALSyncTimeoutException");
071  private static volatile boolean testWALTimout = false;
072  private static final long timeoutMIlliseconds = 3000;
073  private static final String USER_THREAD_NAME = tableName.getNameAsString();
074
075  @BeforeAll
076  public static void setUp() throws Exception {
077    Configuration conf = HTU.getConfiguration();
078    conf.setClass(HConstants.REGION_IMPL, HRegionForTest.class, HRegion.class);
079    conf.setInt(RegionReplicationSink.RETRIES_NUMBER, 1);
080    conf.setLong(RegionReplicationSink.RPC_TIMEOUT_MS, 10 * 60 * 1000);
081    conf.setLong(RegionReplicationSink.OPERATION_TIMEOUT_MS, 20 * 60 * 1000);
082    conf.setLong(RegionReplicationSink.META_EDIT_RPC_TIMEOUT_MS, 10 * 60 * 1000);
083    conf.setLong(RegionReplicationSink.META_EDIT_OPERATION_TIMEOUT_MS, 20 * 60 * 1000);
084    conf.setClass(WALFactory.WAL_PROVIDER, SlowAsyncFSWALProvider.class, WALProvider.class);
085    conf.setLong(AbstractFSWAL.WAL_SYNC_TIMEOUT_MS, timeoutMIlliseconds);
086    HTU.startMiniCluster(StartTestingClusterOption.builder().numRegionServers(1).build());
087
088  }
089
090  @AfterAll
091  public static void tearDown() throws Exception {
092    HTU.shutdownMiniCluster();
093  }
094
095  /**
096   * This test is for HBASE-27230. When {@link WAL#sync} timeout, it would throws
097   * {@link WALSyncTimeoutIOException},and when {@link HRegion#doWALAppend} catches this exception
098   * it aborts the RegionServer.
099   */
100  @Test
101  public void testWALSyncWriteException() throws Exception {
102    final HRegionForTest region = this.createTable();
103
104    String oldThreadName = Thread.currentThread().getName();
105    Thread.currentThread().setName(USER_THREAD_NAME);
106    try {
107      byte[] rowKey1 = Bytes.toBytes(1);
108      byte[] value1 = Bytes.toBytes(3);
109      Thread.sleep(2000);
110      testWALTimout = true;
111
112      /**
113       * The {@link WAL#sync} would timeout and throws {@link WALSyncTimeoutIOException},when
114       * {@link HRegion#doWALAppend} catches this exception it aborts the RegionServer.
115       */
116      try {
117        region.put(new Put(rowKey1).addColumn(FAMILY, QUAL, value1));
118        fail();
119      } catch (WALSyncTimeoutIOException e) {
120        assertTrue(e != null);
121      }
122      assertTrue(region.getRSServices().isAborted());
123    } finally {
124      Thread.currentThread().setName(oldThreadName);
125      testWALTimout = false;
126    }
127  }
128
129  private HRegionForTest createTable() throws Exception {
130    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
131      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
132    HTU.getAdmin().createTable(tableDescriptor);
133    HRegionServer rs = HTU.getMiniHBaseCluster().getRegionServer(0);
134    return (HRegionForTest) rs.getRegions(tableName).get(0);
135  }
136
137  public static final class HRegionForTest extends HRegion {
138
139    public HRegionForTest(HRegionFileSystem fs, WAL wal, Configuration confParam,
140      TableDescriptor htd, RegionServerServices rsServices) {
141      super(fs, wal, confParam, htd, rsServices);
142    }
143
144    @SuppressWarnings("deprecation")
145    public HRegionForTest(Path tableDir, WAL wal, FileSystem fs, Configuration confParam,
146      RegionInfo regionInfo, TableDescriptor htd, RegionServerServices rsServices) {
147      super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices);
148    }
149
150    public RegionServerServices getRSServices() {
151      return this.rsServices;
152    }
153
154  }
155
156  public static class SlowAsyncFSWAL extends AsyncFSWAL {
157
158    public SlowAsyncFSWAL(FileSystem fs, Abortable abortable, Path rootDir, String logDir,
159      String archiveDir, Configuration conf, List<WALActionsListener> listeners,
160      boolean failIfWALExists, String prefix, String suffix, EventLoopGroup eventLoopGroup,
161      Class<? extends Channel> channelClass, StreamSlowMonitor monitor)
162      throws FailedLogCloseException, IOException {
163      super(fs, abortable, rootDir, logDir, archiveDir, conf, listeners, failIfWALExists, prefix,
164        suffix, null, null, eventLoopGroup, channelClass, monitor);
165    }
166
167    @Override
168    protected void atHeadOfRingBufferEventHandlerAppend() {
169      if (testWALTimout) {
170        try {
171          Thread.sleep(timeoutMIlliseconds + 1000);
172        } catch (InterruptedException e) {
173          throw new RuntimeException(e);
174        }
175      }
176      super.atHeadOfRingBufferEventHandlerAppend();
177    }
178
179  }
180
181  public static class SlowAsyncFSWALProvider extends AsyncFSWALProvider {
182
183    @Override
184    protected AsyncFSWAL createWAL() throws IOException {
185      return new SlowAsyncFSWAL(CommonFSUtils.getWALFileSystem(conf), this.abortable,
186        CommonFSUtils.getWALRootDir(conf), getWALDirectoryName(factory.getFactoryId()),
187        getWALArchiveDirectoryName(conf, factory.getFactoryId()), conf, listeners, true, logPrefix,
188        META_WAL_PROVIDER_ID.equals(providerId) ? META_WAL_PROVIDER_ID : null, eventLoopGroup,
189        channelClass, factory.getExcludeDatanodeManager().getStreamSlowMonitor(providerId));
190    }
191
192  }
193}