001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.coprocessor;
019
020import static org.junit.jupiter.api.Assertions.assertFalse;
021import static org.junit.jupiter.api.Assertions.assertTrue;
022import static org.junit.jupiter.api.Assertions.fail;
023
024import java.io.IOException;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HBaseTestingUtil;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.Durability;
029import org.apache.hadoop.hbase.client.Put;
030import org.apache.hadoop.hbase.client.Table;
031import org.apache.hadoop.hbase.regionserver.HRegionServer;
032import org.apache.hadoop.hbase.testclassification.CoprocessorTests;
033import org.apache.hadoop.hbase.testclassification.MediumTests;
034import org.apache.hadoop.hbase.util.Bytes;
035import org.apache.hadoop.hbase.wal.WALEdit;
036import org.junit.jupiter.api.AfterAll;
037import org.junit.jupiter.api.BeforeAll;
038import org.junit.jupiter.api.Tag;
039import org.junit.jupiter.api.Test;
040
041/**
042 * Tests unhandled exceptions thrown by coprocessors running on regionserver. Expected result is
043 * that the region server will remove the buggy coprocessor from its set of coprocessors and throw a
044 * org.apache.hadoop.hbase.exceptions.DoNotRetryIOException back to the client. (HBASE-4014).
045 */
046@Tag(CoprocessorTests.TAG)
047@Tag(MediumTests.TAG)
048public class TestRegionServerCoprocessorExceptionWithRemove {
049
050  public static class BuggyRegionObserver extends SimpleRegionObserver {
051    @SuppressWarnings("null")
052    @Override
053    public void prePut(final ObserverContext<? extends RegionCoprocessorEnvironment> c,
054      final Put put, final WALEdit edit, final Durability durability) {
055      String tableName =
056        c.getEnvironment().getRegion().getRegionInfo().getTable().getNameAsString();
057      if (tableName.equals("observed_table")) {
058        // Trigger a NPE to fail the coprocessor
059        Integer i = null;
060        i = i + 1;
061      }
062    }
063  }
064
065  private static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
066
067  @BeforeAll
068  public static void setupBeforeClass() throws Exception {
069    // set configure to indicate which cp should be loaded
070    Configuration conf = TEST_UTIL.getConfiguration();
071    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, BuggyRegionObserver.class.getName());
072    TEST_UTIL.getConfiguration().setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false);
073    TEST_UTIL.startMiniCluster();
074  }
075
076  @AfterAll
077  public static void teardownAfterClass() throws Exception {
078    TEST_UTIL.shutdownMiniCluster();
079  }
080
081  @Test
082  public void testExceptionFromCoprocessorDuringPut() throws IOException, InterruptedException {
083    // Set watches on the zookeeper nodes for all of the regionservers in the
084    // cluster. When we try to write to TEST_TABLE, the buggy coprocessor will
085    // cause a NullPointerException, which will cause the regionserver (which
086    // hosts the region we attempted to write to) to abort. In turn, this will
087    // cause the nodeDeleted() method of the DeadRegionServer tracker to
088    // execute, which will set the rsZKNodeDeleted flag to true, which will
089    // pass this test.
090
091    TableName TEST_TABLE = TableName.valueOf("observed_table");
092    byte[] TEST_FAMILY = Bytes.toBytes("aaa");
093
094    Table table = TEST_UTIL.createMultiRegionTable(TEST_TABLE, TEST_FAMILY);
095    TEST_UTIL.waitUntilAllRegionsAssigned(TEST_TABLE);
096    // Note which regionServer that should survive the buggy coprocessor's
097    // prePut().
098    HRegionServer regionServer = TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE);
099
100    boolean threwIOE = false;
101    try {
102      final byte[] ROW = Bytes.toBytes("aaa");
103      Put put = new Put(ROW);
104      put.addColumn(TEST_FAMILY, ROW, ROW);
105      table.put(put);
106      // We may need two puts to reliably get an exception
107      table.put(put);
108    } catch (IOException e) {
109      threwIOE = true;
110    } finally {
111      assertTrue(threwIOE, "The regionserver should have thrown an exception");
112    }
113
114    // Wait 10 seconds for the regionserver to abort: expected result is that
115    // it will survive and not abort.
116    for (int i = 0; i < 10; i++) {
117      assertFalse(regionServer.isAborted());
118      try {
119        Thread.sleep(1000);
120      } catch (InterruptedException e) {
121        fail("InterruptedException while waiting for regionserver " + "zk node to be deleted.");
122      }
123    }
124    table.close();
125  }
126
127}