001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.wal; 019 020import static org.junit.jupiter.api.Assertions.assertTrue; 021import static org.junit.jupiter.api.Assertions.fail; 022 023import java.io.IOException; 024import java.util.List; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.Abortable; 029import org.apache.hadoop.hbase.HBaseTestingUtil; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.StartTestingClusterOption; 032import org.apache.hadoop.hbase.TableName; 033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 034import org.apache.hadoop.hbase.client.Put; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.client.TableDescriptor; 037import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 038import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor; 039import org.apache.hadoop.hbase.regionserver.HRegion; 040import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 041import org.apache.hadoop.hbase.regionserver.HRegionServer; 042import org.apache.hadoop.hbase.regionserver.RegionServerServices; 043import org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink; 044import org.apache.hadoop.hbase.testclassification.LargeTests; 045import org.apache.hadoop.hbase.testclassification.RegionServerTests; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.apache.hadoop.hbase.util.CommonFSUtils; 048import org.apache.hadoop.hbase.wal.AsyncFSWALProvider; 049import org.apache.hadoop.hbase.wal.WAL; 050import org.apache.hadoop.hbase.wal.WALFactory; 051import org.apache.hadoop.hbase.wal.WALProvider; 052import org.junit.jupiter.api.AfterAll; 053import org.junit.jupiter.api.BeforeAll; 054import org.junit.jupiter.api.Tag; 055import org.junit.jupiter.api.Test; 056 057import org.apache.hbase.thirdparty.io.netty.channel.Channel; 058import org.apache.hbase.thirdparty.io.netty.channel.EventLoopGroup; 059 060@Tag(RegionServerTests.TAG) 061@Tag(LargeTests.TAG) 062public class TestWALSyncTimeoutException { 063 064 private static final byte[] FAMILY = Bytes.toBytes("family_test"); 065 066 private static final byte[] QUAL = Bytes.toBytes("qualifier_test"); 067 068 private static final HBaseTestingUtil HTU = new HBaseTestingUtil(); 069 070 private static TableName tableName = TableName.valueOf("TestWALSyncTimeoutException"); 071 private static volatile boolean testWALTimout = false; 072 private static final long timeoutMIlliseconds = 3000; 073 private static final String USER_THREAD_NAME = tableName.getNameAsString(); 074 075 @BeforeAll 076 public static void setUp() throws Exception { 077 Configuration conf = HTU.getConfiguration(); 078 conf.setClass(HConstants.REGION_IMPL, HRegionForTest.class, HRegion.class); 079 conf.setInt(RegionReplicationSink.RETRIES_NUMBER, 1); 080 conf.setLong(RegionReplicationSink.RPC_TIMEOUT_MS, 10 * 60 * 1000); 081 conf.setLong(RegionReplicationSink.OPERATION_TIMEOUT_MS, 20 * 60 * 1000); 082 conf.setLong(RegionReplicationSink.META_EDIT_RPC_TIMEOUT_MS, 10 * 60 * 1000); 083 conf.setLong(RegionReplicationSink.META_EDIT_OPERATION_TIMEOUT_MS, 20 * 60 * 1000); 084 conf.setClass(WALFactory.WAL_PROVIDER, SlowAsyncFSWALProvider.class, WALProvider.class); 085 conf.setLong(AbstractFSWAL.WAL_SYNC_TIMEOUT_MS, timeoutMIlliseconds); 086 HTU.startMiniCluster(StartTestingClusterOption.builder().numRegionServers(1).build()); 087 088 } 089 090 @AfterAll 091 public static void tearDown() throws Exception { 092 HTU.shutdownMiniCluster(); 093 } 094 095 /** 096 * This test is for HBASE-27230. When {@link WAL#sync} timeout, it would throws 097 * {@link WALSyncTimeoutIOException},and when {@link HRegion#doWALAppend} catches this exception 098 * it aborts the RegionServer. 099 */ 100 @Test 101 public void testWALSyncWriteException() throws Exception { 102 final HRegionForTest region = this.createTable(); 103 104 String oldThreadName = Thread.currentThread().getName(); 105 Thread.currentThread().setName(USER_THREAD_NAME); 106 try { 107 byte[] rowKey1 = Bytes.toBytes(1); 108 byte[] value1 = Bytes.toBytes(3); 109 Thread.sleep(2000); 110 testWALTimout = true; 111 112 /** 113 * The {@link WAL#sync} would timeout and throws {@link WALSyncTimeoutIOException},when 114 * {@link HRegion#doWALAppend} catches this exception it aborts the RegionServer. 115 */ 116 try { 117 region.put(new Put(rowKey1).addColumn(FAMILY, QUAL, value1)); 118 fail(); 119 } catch (WALSyncTimeoutIOException e) { 120 assertTrue(e != null); 121 } 122 assertTrue(region.getRSServices().isAborted()); 123 } finally { 124 Thread.currentThread().setName(oldThreadName); 125 testWALTimout = false; 126 } 127 } 128 129 private HRegionForTest createTable() throws Exception { 130 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 131 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); 132 HTU.getAdmin().createTable(tableDescriptor); 133 HRegionServer rs = HTU.getMiniHBaseCluster().getRegionServer(0); 134 return (HRegionForTest) rs.getRegions(tableName).get(0); 135 } 136 137 public static final class HRegionForTest extends HRegion { 138 139 public HRegionForTest(HRegionFileSystem fs, WAL wal, Configuration confParam, 140 TableDescriptor htd, RegionServerServices rsServices) { 141 super(fs, wal, confParam, htd, rsServices); 142 } 143 144 @SuppressWarnings("deprecation") 145 public HRegionForTest(Path tableDir, WAL wal, FileSystem fs, Configuration confParam, 146 RegionInfo regionInfo, TableDescriptor htd, RegionServerServices rsServices) { 147 super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices); 148 } 149 150 public RegionServerServices getRSServices() { 151 return this.rsServices; 152 } 153 154 } 155 156 public static class SlowAsyncFSWAL extends AsyncFSWAL { 157 158 public SlowAsyncFSWAL(FileSystem fs, Abortable abortable, Path rootDir, String logDir, 159 String archiveDir, Configuration conf, List<WALActionsListener> listeners, 160 boolean failIfWALExists, String prefix, String suffix, EventLoopGroup eventLoopGroup, 161 Class<? extends Channel> channelClass, StreamSlowMonitor monitor) 162 throws FailedLogCloseException, IOException { 163 super(fs, abortable, rootDir, logDir, archiveDir, conf, listeners, failIfWALExists, prefix, 164 suffix, null, null, eventLoopGroup, channelClass, monitor); 165 } 166 167 @Override 168 protected void atHeadOfRingBufferEventHandlerAppend() { 169 if (testWALTimout) { 170 try { 171 Thread.sleep(timeoutMIlliseconds + 1000); 172 } catch (InterruptedException e) { 173 throw new RuntimeException(e); 174 } 175 } 176 super.atHeadOfRingBufferEventHandlerAppend(); 177 } 178 179 } 180 181 public static class SlowAsyncFSWALProvider extends AsyncFSWALProvider { 182 183 @Override 184 protected AsyncFSWAL createWAL() throws IOException { 185 return new SlowAsyncFSWAL(CommonFSUtils.getWALFileSystem(conf), this.abortable, 186 CommonFSUtils.getWALRootDir(conf), getWALDirectoryName(factory.getFactoryId()), 187 getWALArchiveDirectoryName(conf, factory.getFactoryId()), conf, listeners, true, logPrefix, 188 META_WAL_PROVIDER_ID.equals(providerId) ? META_WAL_PROVIDER_ID : null, eventLoopGroup, 189 channelClass, factory.getExcludeDatanodeManager().getStreamSlowMonitor(providerId)); 190 } 191 192 } 193}