001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.jupiter.api.Assertions.assertTrue;
021
022import java.io.IOException;
023import java.util.concurrent.CountDownLatch;
024import java.util.stream.Stream;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate;
027import org.apache.hadoop.hbase.HBaseTestingUtil;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
031import org.apache.hadoop.hbase.YouAreDeadException;
032import org.apache.hadoop.hbase.client.Table;
033import org.apache.hadoop.hbase.testclassification.LargeTests;
034import org.apache.hadoop.hbase.testclassification.RegionServerTests;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
037import org.apache.hadoop.hbase.wal.WALFactory;
038import org.apache.zookeeper.KeeperException.SessionExpiredException;
039import org.junit.jupiter.api.AfterEach;
040import org.junit.jupiter.api.BeforeEach;
041import org.junit.jupiter.api.Tag;
042import org.junit.jupiter.api.TestTemplate;
043import org.junit.jupiter.params.provider.Arguments;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047/**
048 * See HBASE-19929 for more details.
049 */
050@Tag(RegionServerTests.TAG)
051@Tag(LargeTests.TAG)
052@HBaseParameterizedTestTemplate(name = "{index}: WAL={0}")
053public class TestShutdownWhileWALBroken {
054
055  private static final Logger LOG = LoggerFactory.getLogger(TestShutdownWhileWALBroken.class);
056
057  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
058
059  private static TableName TABLE_NAME = TableName.valueOf("TestShutdownWhileWALBroken");
060
061  private static byte[] CF = Bytes.toBytes("CF");
062
063  public String walType;
064
065  public static Stream<Arguments> parameters() {
066    return Stream.of(Arguments.of("asyncfs"), Arguments.of("filesystem"));
067  }
068
069  public TestShutdownWhileWALBroken(String walType) {
070    this.walType = walType;
071  }
072
073  public static final class MyRegionServer extends HRegionServer {
074
075    private final CountDownLatch latch = new CountDownLatch(1);
076
077    public MyRegionServer(Configuration conf) throws IOException {
078      super(conf);
079    }
080
081    @Override
082    protected void tryRegionServerReport(long reportStartTime, long reportEndTime)
083      throws IOException {
084      try {
085        super.tryRegionServerReport(reportStartTime, reportEndTime);
086      } catch (YouAreDeadException e) {
087        LOG.info("Caught YouAreDeadException, ignore", e);
088      }
089    }
090
091    @Override
092    public void abort(String reason, Throwable cause) {
093      if (cause instanceof SessionExpiredException) {
094        // called from ZKWatcher, let's wait a bit to make sure that we call stop before calling
095        // abort.
096        try {
097          latch.await();
098        } catch (InterruptedException e) {
099        }
100      } else {
101        // abort from other classes, usually LogRoller, now we can make progress on abort.
102        latch.countDown();
103      }
104      super.abort(reason, cause);
105    }
106  }
107
108  @BeforeEach
109  public void setUp() throws Exception {
110    UTIL.getConfiguration().setClass(HConstants.REGION_SERVER_IMPL, MyRegionServer.class,
111      HRegionServer.class);
112    UTIL.getConfiguration().set(WALFactory.WAL_PROVIDER, walType);
113    UTIL.startMiniCluster(2);
114  }
115
116  @AfterEach
117  public void tearDown() throws Exception {
118    UTIL.shutdownMiniCluster();
119  }
120
121  @TestTemplate
122  public void test() throws Exception {
123    UTIL.createMultiRegionTable(TABLE_NAME, CF);
124    try (Table table = UTIL.getConnection().getTable(TABLE_NAME)) {
125      UTIL.loadTable(table, CF);
126    }
127    int numRegions = UTIL.getMiniHBaseCluster().getRegions(TABLE_NAME).size();
128    RegionServerThread rst0 = UTIL.getMiniHBaseCluster().getRegionServerThreads().get(0);
129    RegionServerThread rst1 = UTIL.getMiniHBaseCluster().getRegionServerThreads().get(1);
130    HRegionServer liveRS;
131    RegionServerThread toKillRSThread;
132    if (rst1.getRegionServer().getRegions(TableName.META_TABLE_NAME).isEmpty()) {
133      liveRS = rst0.getRegionServer();
134      toKillRSThread = rst1;
135    } else {
136      liveRS = rst1.getRegionServer();
137      toKillRSThread = rst0;
138    }
139    assertTrue(liveRS.getRegions(TABLE_NAME).size() < numRegions);
140    UTIL.expireSession(toKillRSThread.getRegionServer().getZooKeeper(), false);
141    UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
142
143      @Override
144      public boolean evaluate() throws Exception {
145        return liveRS.getRegions(TABLE_NAME).size() == numRegions;
146      }
147
148      @Override
149      public String explainFailure() throws Exception {
150        return "Failover is not finished yet";
151      }
152    });
153    toKillRSThread.getRegionServer().stop("Stop for test");
154    // make sure that we can successfully quit
155    toKillRSThread.join();
156  }
157}