001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.asyncfs; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Map; 027import java.util.concurrent.CompletableFuture; 028import java.util.concurrent.CyclicBarrier; 029import java.util.concurrent.ExecutionException; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor; 033import org.apache.hadoop.hbase.testclassification.MediumTests; 034import org.apache.hadoop.hbase.testclassification.MiscTests; 035import org.apache.hadoop.hbase.util.Bytes; 036import org.apache.hadoop.hdfs.DistributedFileSystem; 037import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; 038import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 039import org.apache.hadoop.hdfs.server.datanode.DataNode; 040import org.junit.AfterClass; 041import org.junit.BeforeClass; 042import org.junit.ClassRule; 043import org.junit.Rule; 044import org.junit.Test; 045import org.junit.experimental.categories.Category; 046import org.junit.rules.TestName; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050import org.apache.hbase.thirdparty.io.netty.buffer.ByteBuf; 051import org.apache.hbase.thirdparty.io.netty.channel.Channel; 052import org.apache.hbase.thirdparty.io.netty.channel.ChannelHandlerContext; 053import org.apache.hbase.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter; 054import org.apache.hbase.thirdparty.io.netty.channel.EventLoop; 055import org.apache.hbase.thirdparty.io.netty.channel.EventLoopGroup; 056import org.apache.hbase.thirdparty.io.netty.channel.nio.NioEventLoopGroup; 057import org.apache.hbase.thirdparty.io.netty.channel.socket.nio.NioSocketChannel; 058 059/** 060 * Testcase for HBASE-26679, here we introduce a separate test class and not put the testcase in 061 * {@link TestFanOutOneBlockAsyncDFSOutput} because we will send heartbeat to DN when there is no 062 * out going packet, the timeout is controlled by 063 * {@link TestFanOutOneBlockAsyncDFSOutput#READ_TIMEOUT_MS},which is 2 seconds, it will keep sending 064 * package out and DN will respond immedately and then mess up the testing handler added by us. So 065 * in this test class we use the default value for timeout which is 60 seconds and it is enough for 066 * this test. 067 */ 068@Category({ MiscTests.class, MediumTests.class }) 069public class TestFanOutOneBlockAsyncDFSOutputHang extends AsyncFSTestBase { 070 071 @ClassRule 072 public static final HBaseClassTestRule CLASS_RULE = 073 HBaseClassTestRule.forClass(TestFanOutOneBlockAsyncDFSOutputHang.class); 074 075 private static final Logger LOG = 076 LoggerFactory.getLogger(TestFanOutOneBlockAsyncDFSOutputHang.class); 077 078 private static DistributedFileSystem FS; 079 080 private static EventLoopGroup EVENT_LOOP_GROUP; 081 082 private static Class<? extends Channel> CHANNEL_CLASS; 083 084 private static StreamSlowMonitor MONITOR; 085 086 private static FanOutOneBlockAsyncDFSOutput OUT; 087 088 @Rule 089 public TestName name = new TestName(); 090 091 @BeforeClass 092 public static void setUp() throws Exception { 093 startMiniDFSCluster(2); 094 FS = CLUSTER.getFileSystem(); 095 EVENT_LOOP_GROUP = new NioEventLoopGroup(); 096 CHANNEL_CLASS = NioSocketChannel.class; 097 MONITOR = StreamSlowMonitor.create(UTIL.getConfiguration(), "testMonitor"); 098 Path f = new Path("/testHang"); 099 EventLoop eventLoop = EVENT_LOOP_GROUP.next(); 100 OUT = FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 2, 101 FS.getDefaultBlockSize(), eventLoop, CHANNEL_CLASS, MONITOR); 102 } 103 104 @AfterClass 105 public static void tearDown() throws Exception { 106 if (OUT != null) { 107 OUT.recoverAndClose(null); 108 } 109 if (EVENT_LOOP_GROUP != null) { 110 EVENT_LOOP_GROUP.shutdownGracefully().get(); 111 } 112 shutdownMiniDFSCluster(); 113 } 114 115 /** 116 * <pre> 117 * This test is for HBASE-26679. Consider there are two dataNodes: dn1 and dn2,dn2 is a slow DN. 118 * The threads sequence before HBASE-26679 is: 119 * 1.We write some data to {@link FanOutOneBlockAsyncDFSOutput} and then flush it, there are one 120 * {@link FanOutOneBlockAsyncDFSOutput.Callback} in 121 * {@link FanOutOneBlockAsyncDFSOutput#waitingAckQueue}. 122 * 2.The ack from dn1 arrives firstly and triggers Netty to invoke 123 * {@link FanOutOneBlockAsyncDFSOutput#completed} with dn1's channel, then in 124 * {@link FanOutOneBlockAsyncDFSOutput#completed}, dn1's channel is removed from 125 * {@link FanOutOneBlockAsyncDFSOutput.Callback#unfinishedReplicas}. 126 * 3.But dn2 responds slowly, before dn2 sending ack,dn1 is shut down or have a exception, 127 * so {@link FanOutOneBlockAsyncDFSOutput#failed} is triggered by Netty with dn1's channel, 128 * and because the {@link FanOutOneBlockAsyncDFSOutput.Callback#unfinishedReplicas} does not 129 * contain dn1's channel,the {@link FanOutOneBlockAsyncDFSOutput.Callback} is skipped in 130 * {@link FanOutOneBlockAsyncDFSOutput#failed} method,and 131 * {@link FanOutOneBlockAsyncDFSOutput#state} is set to 132 * {@link FanOutOneBlockAsyncDFSOutput.State#BROKEN},and dn1,dn2 are all closed at the end of 133 * {@link FanOutOneBlockAsyncDFSOutput#failed}. 134 * 4.{@link FanOutOneBlockAsyncDFSOutput#failed} is triggered again by dn2 because it is closed, 135 * but because {@link FanOutOneBlockAsyncDFSOutput#state} is already 136 * {@link FanOutOneBlockAsyncDFSOutput.State#BROKEN},the whole 137 * {@link FanOutOneBlockAsyncDFSOutput#failed} is skipped. So wait on the future 138 * returned by {@link FanOutOneBlockAsyncDFSOutput#flush} would be stuck for ever. 139 * After HBASE-26679, for above step 4,even if the {@link FanOutOneBlockAsyncDFSOutput#state} 140 * is already {@link FanOutOneBlockAsyncDFSOutput.State#BROKEN}, we would still try to trigger 141 * {@link FanOutOneBlockAsyncDFSOutput.Callback#future}. 142 * </pre> 143 */ 144 @Test 145 public void testFlushHangWhenOneDataNodeFailedBeforeOtherDataNodeAck() throws Exception { 146 147 DataNodeProperties firstDataNodeProperties = null; 148 try { 149 150 final CyclicBarrier dn1AckReceivedCyclicBarrier = new CyclicBarrier(2); 151 Map<Channel, DatanodeInfo> datanodeInfoMap = OUT.getDatanodeInfoMap(); 152 Iterator<Map.Entry<Channel, DatanodeInfo>> iterator = datanodeInfoMap.entrySet().iterator(); 153 assertTrue(iterator.hasNext()); 154 Map.Entry<Channel, DatanodeInfo> dn1Entry = iterator.next(); 155 Channel dn1Channel = dn1Entry.getKey(); 156 DatanodeInfo dn1DatanodeInfo = dn1Entry.getValue(); 157 final List<String> protobufDecoderNames = new ArrayList<String>(); 158 dn1Channel.pipeline().forEach((entry) -> { 159 if (ProtobufDecoder.class.isInstance(entry.getValue())) { 160 protobufDecoderNames.add(entry.getKey()); 161 } 162 }); 163 assertTrue(protobufDecoderNames.size() == 1); 164 dn1Channel.pipeline().addAfter(protobufDecoderNames.get(0), "dn1AckReceivedHandler", 165 new ChannelInboundHandlerAdapter() { 166 @Override 167 public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { 168 super.channelRead(ctx, msg); 169 dn1AckReceivedCyclicBarrier.await(); 170 } 171 }); 172 173 assertTrue(iterator.hasNext()); 174 Map.Entry<Channel, DatanodeInfo> dn2Entry = iterator.next(); 175 Channel dn2Channel = dn2Entry.getKey(); 176 177 /** 178 * Here we add a {@link ChannelInboundHandlerAdapter} to eat all the responses to simulate a 179 * slow dn2. 180 */ 181 dn2Channel.pipeline().addFirst(new ChannelInboundHandlerAdapter() { 182 183 @Override 184 public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { 185 if (!(msg instanceof ByteBuf)) { 186 ctx.fireChannelRead(msg); 187 } else { 188 ((ByteBuf) msg).release(); 189 } 190 } 191 }); 192 193 byte[] b = new byte[10]; 194 Bytes.random(b); 195 OUT.write(b, 0, b.length); 196 CompletableFuture<Long> future = OUT.flush(false); 197 /** 198 * Wait for ack from dn1. 199 */ 200 dn1AckReceivedCyclicBarrier.await(); 201 /** 202 * First ack is received from dn1,we could stop dn1 now. 203 */ 204 firstDataNodeProperties = findAndKillFirstDataNode(dn1DatanodeInfo); 205 assertTrue(firstDataNodeProperties != null); 206 try { 207 /** 208 * Before HBASE-26679,here we should be stuck, after HBASE-26679,we would fail soon with 209 * {@link ExecutionException}. 210 */ 211 future.get(); 212 fail(); 213 } catch (ExecutionException e) { 214 assertTrue(e != null); 215 LOG.info("expected exception caught when get future", e); 216 } 217 /** 218 * Make sure all the data node channel are closed. 219 */ 220 datanodeInfoMap.keySet().forEach(ch -> { 221 try { 222 ch.closeFuture().get(); 223 } catch (InterruptedException | ExecutionException e) { 224 throw new RuntimeException(e); 225 } 226 }); 227 } finally { 228 if (firstDataNodeProperties != null) { 229 CLUSTER.restartDataNode(firstDataNodeProperties); 230 } 231 } 232 } 233 234 private static DataNodeProperties findAndKillFirstDataNode(DatanodeInfo firstDatanodeInfo) { 235 assertTrue(firstDatanodeInfo != null); 236 ArrayList<DataNode> dataNodes = CLUSTER.getDataNodes(); 237 ArrayList<Integer> foundIndexes = new ArrayList<Integer>(); 238 int index = 0; 239 for (DataNode dataNode : dataNodes) { 240 if (firstDatanodeInfo.getXferAddr().equals(dataNode.getDatanodeId().getXferAddr())) { 241 foundIndexes.add(index); 242 } 243 index++; 244 } 245 assertTrue(foundIndexes.size() == 1); 246 return CLUSTER.stopDataNode(foundIndexes.get(0)); 247 } 248 249}