001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.fail;
022
023import java.io.IOException;
024import java.util.Arrays;
025import java.util.List;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtil;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
032import org.apache.hadoop.hbase.YouAreDeadException;
033import org.apache.hadoop.hbase.client.Get;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.Table;
036import org.apache.hadoop.hbase.testclassification.LargeTests;
037import org.apache.hadoop.hbase.testclassification.RegionServerTests;
038import org.apache.hadoop.hbase.util.Bytes;
039import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
040import org.apache.hadoop.hbase.wal.AsyncFSWALProvider;
041import org.apache.hadoop.hbase.wal.FSHLogProvider;
042import org.apache.hadoop.hbase.wal.WALFactory;
043import org.apache.hadoop.hbase.wal.WALProvider;
044import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
045import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
046import org.junit.After;
047import org.junit.Before;
048import org.junit.ClassRule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.junit.runner.RunWith;
052import org.junit.runners.Parameterized;
053import org.junit.runners.Parameterized.Parameter;
054import org.junit.runners.Parameterized.Parameters;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058/**
059 * This testcase is used to ensure that the compaction marker will fail a compaction if the RS is
060 * already dead. It can not eliminate FNFE when scanning but it does reduce the possibility a lot.
061 */
062@RunWith(Parameterized.class)
063@Category({ RegionServerTests.class, LargeTests.class })
064public class TestCompactionInDeadRegionServer {
065
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068    HBaseClassTestRule.forClass(TestCompactionInDeadRegionServer.class);
069
070  private static final Logger LOG = LoggerFactory.getLogger(TestCompactionInDeadRegionServer.class);
071
072  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
073
074  private static final TableName TABLE_NAME = TableName.valueOf("test");
075
076  private static final byte[] CF = Bytes.toBytes("cf");
077
078  private static final byte[] CQ = Bytes.toBytes("cq");
079
080  public static final class IgnoreYouAreDeadRS extends HRegionServer {
081
082    public IgnoreYouAreDeadRS(Configuration conf) throws IOException, InterruptedException {
083      super(conf);
084    }
085
086    @Override
087    protected void tryRegionServerReport(long reportStartTime, long reportEndTime)
088      throws IOException {
089      try {
090        super.tryRegionServerReport(reportStartTime, reportEndTime);
091      } catch (YouAreDeadException e) {
092        // ignore, do not abort
093      }
094    }
095  }
096
097  @Parameter
098  public Class<? extends WALProvider> walProvider;
099
100  @Parameters(name = "{index}: wal={0}")
101  public static List<Object[]> params() {
102    return Arrays.asList(new Object[] { FSHLogProvider.class },
103      new Object[] { AsyncFSWALProvider.class });
104  }
105
106  @Before
107  public void setUp() throws Exception {
108    UTIL.getConfiguration().setClass(WALFactory.WAL_PROVIDER, walProvider, WALProvider.class);
109    UTIL.getConfiguration().setInt(HConstants.ZK_SESSION_TIMEOUT, 2000);
110    UTIL.getConfiguration().setClass(HConstants.REGION_SERVER_IMPL, IgnoreYouAreDeadRS.class,
111      HRegionServer.class);
112    UTIL.startMiniCluster(2);
113    Table table = UTIL.createTable(TABLE_NAME, CF);
114    for (int i = 0; i < 10; i++) {
115      table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
116    }
117    UTIL.getAdmin().flush(TABLE_NAME);
118    for (int i = 10; i < 20; i++) {
119      table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
120    }
121    UTIL.getAdmin().flush(TABLE_NAME);
122  }
123
124  @After
125  public void tearDown() throws Exception {
126    UTIL.shutdownMiniCluster();
127  }
128
129  @Test
130  public void test() throws Exception {
131    HRegionServer regionSvr = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
132    HRegion region = regionSvr.getRegions(TABLE_NAME).get(0);
133    String regName = region.getRegionInfo().getEncodedName();
134    List<HRegion> metaRegs = regionSvr.getRegions(TableName.META_TABLE_NAME);
135    if (metaRegs != null && !metaRegs.isEmpty()) {
136      LOG.info("meta is on the same server: " + regionSvr);
137      // when region is on same server as hbase:meta, reassigning meta would abort the server
138      // since WAL is broken.
139      // so the region is moved to a different server
140      HRegionServer otherRs = UTIL.getOtherRegionServer(regionSvr);
141      UTIL.moveRegionAndWait(region.getRegionInfo(), otherRs.getServerName());
142      LOG.info("Moved region: " + regName + " to " + otherRs.getServerName());
143    }
144    HRegionServer rsToSuspend = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
145    region = rsToSuspend.getRegions(TABLE_NAME).get(0);
146
147    ZKWatcher watcher = UTIL.getZooKeeperWatcher();
148    watcher.getRecoverableZooKeeper().delete(
149      ZNodePaths.joinZNode(watcher.getZNodePaths().rsZNode, rsToSuspend.getServerName().toString()),
150      -1);
151    LOG.info("suspending " + rsToSuspend);
152    UTIL.waitFor(60000, 1000, new ExplainingPredicate<Exception>() {
153
154      @Override
155      public boolean evaluate() throws Exception {
156        for (RegionServerThread thread : UTIL.getHBaseCluster().getRegionServerThreads()) {
157          HRegionServer rs = thread.getRegionServer();
158          if (rs != rsToSuspend) {
159            return !rs.getRegions(TABLE_NAME).isEmpty();
160          }
161        }
162        return false;
163      }
164
165      @Override
166      public String explainFailure() throws Exception {
167        return "The region for " + TABLE_NAME + " is still on " + rsToSuspend.getServerName();
168      }
169    });
170    try {
171      region.compact(true);
172      fail("Should fail as our wal file has already been closed, "
173        + "and walDir has also been renamed");
174    } catch (Exception e) {
175      LOG.debug("expected exception: ", e);
176    }
177    Table table = UTIL.getConnection().getTable(TABLE_NAME);
178    // should not hit FNFE
179    for (int i = 0; i < 20; i++) {
180      assertEquals(i, Bytes.toInt(table.get(new Get(Bytes.toBytes(i))).getValue(CF, CQ)));
181    }
182  }
183}