001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertTrue;
021
022import java.io.IOException;
023import java.util.Arrays;
024import java.util.List;
025import java.util.concurrent.CountDownLatch;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
032import org.apache.hadoop.hbase.YouAreDeadException;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.testclassification.LargeTests;
035import org.apache.hadoop.hbase.testclassification.RegionServerTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
038import org.apache.hadoop.hbase.wal.WALFactory;
039import org.apache.zookeeper.KeeperException.SessionExpiredException;
040import org.junit.After;
041import org.junit.Before;
042import org.junit.ClassRule;
043import org.junit.Test;
044import org.junit.experimental.categories.Category;
045import org.junit.runner.RunWith;
046import org.junit.runners.Parameterized;
047import org.junit.runners.Parameterized.Parameter;
048import org.junit.runners.Parameterized.Parameters;
049import org.slf4j.Logger;
050import org.slf4j.LoggerFactory;
051
052/**
053 * See HBASE-19929 for more details.
054 */
055@RunWith(Parameterized.class)
056@Category({ RegionServerTests.class, LargeTests.class })
057public class TestShutdownWhileWALBroken {
058
059  @ClassRule
060  public static final HBaseClassTestRule CLASS_RULE =
061    HBaseClassTestRule.forClass(TestShutdownWhileWALBroken.class);
062
063  private static final Logger LOG = LoggerFactory.getLogger(TestShutdownWhileWALBroken.class);
064
065  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
066
067  private static TableName TABLE_NAME = TableName.valueOf("TestShutdownWhileWALBroken");
068
069  private static byte[] CF = Bytes.toBytes("CF");
070
071  @Parameter
072  public String walType;
073
074  @Parameters(name = "{index}: WAL={0}")
075  public static List<Object[]> params() {
076    return Arrays.asList(new Object[] { "asyncfs" }, new Object[] { "filesystem" });
077  }
078
079  public static final class MyRegionServer extends HRegionServer {
080
081    private final CountDownLatch latch = new CountDownLatch(1);
082
083    public MyRegionServer(Configuration conf) throws IOException {
084      super(conf);
085    }
086
087    @Override
088    protected void tryRegionServerReport(long reportStartTime, long reportEndTime)
089      throws IOException {
090      try {
091        super.tryRegionServerReport(reportStartTime, reportEndTime);
092      } catch (YouAreDeadException e) {
093        LOG.info("Caught YouAreDeadException, ignore", e);
094      }
095    }
096
097    @Override
098    public void abort(String reason, Throwable cause) {
099      if (cause instanceof SessionExpiredException) {
100        // called from ZKWatcher, let's wait a bit to make sure that we call stop before calling
101        // abort.
102        try {
103          latch.await();
104        } catch (InterruptedException e) {
105        }
106      } else {
107        // abort from other classes, usually LogRoller, now we can make progress on abort.
108        latch.countDown();
109      }
110      super.abort(reason, cause);
111    }
112  }
113
114  @Before
115  public void setUp() throws Exception {
116    UTIL.getConfiguration().setClass(HConstants.REGION_SERVER_IMPL, MyRegionServer.class,
117      HRegionServer.class);
118    UTIL.getConfiguration().set(WALFactory.WAL_PROVIDER, walType);
119    UTIL.startMiniCluster(2);
120  }
121
122  @After
123  public void tearDown() throws Exception {
124    UTIL.shutdownMiniCluster();
125  }
126
127  @Test
128  public void test() throws Exception {
129    UTIL.createMultiRegionTable(TABLE_NAME, CF);
130    try (Table table = UTIL.getConnection().getTable(TABLE_NAME)) {
131      UTIL.loadTable(table, CF);
132    }
133    int numRegions = UTIL.getMiniHBaseCluster().getRegions(TABLE_NAME).size();
134    RegionServerThread rst0 = UTIL.getMiniHBaseCluster().getRegionServerThreads().get(0);
135    RegionServerThread rst1 = UTIL.getMiniHBaseCluster().getRegionServerThreads().get(1);
136    HRegionServer liveRS;
137    RegionServerThread toKillRSThread;
138    if (rst1.getRegionServer().getRegions(TableName.META_TABLE_NAME).isEmpty()) {
139      liveRS = rst0.getRegionServer();
140      toKillRSThread = rst1;
141    } else {
142      liveRS = rst1.getRegionServer();
143      toKillRSThread = rst0;
144    }
145    assertTrue(liveRS.getRegions(TABLE_NAME).size() < numRegions);
146    UTIL.expireSession(toKillRSThread.getRegionServer().getZooKeeper(), false);
147    UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
148
149      @Override
150      public boolean evaluate() throws Exception {
151        return liveRS.getRegions(TABLE_NAME).size() == numRegions;
152      }
153
154      @Override
155      public String explainFailure() throws Exception {
156        return "Failover is not finished yet";
157      }
158    });
159    toKillRSThread.getRegionServer().stop("Stop for test");
160    // make sure that we can successfully quit
161    toKillRSThread.join();
162  }
163}