001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.coprocessor;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertFalse;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023import static org.junit.jupiter.api.Assertions.fail;
024
025import java.io.IOException;
026import java.util.Optional;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.Abortable;
029import org.apache.hadoop.hbase.CoprocessorEnvironment;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.Admin;
035import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.TableDescriptor;
038import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
039import org.apache.hadoop.hbase.master.HMaster;
040import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
041import org.apache.hadoop.hbase.testclassification.CoprocessorTests;
042import org.apache.hadoop.hbase.testclassification.MediumTests;
043import org.apache.hadoop.hbase.util.Bytes;
044import org.apache.hadoop.hbase.zookeeper.ZKNodeTracker;
045import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
046import org.junit.jupiter.api.AfterAll;
047import org.junit.jupiter.api.BeforeAll;
048import org.junit.jupiter.api.Tag;
049import org.junit.jupiter.api.Test;
050
051/**
052 * Tests unhandled exceptions thrown by coprocessors running on master. Expected result is that the
053 * master will abort with an informative error message describing the set of its loaded coprocessors
054 * for crash diagnosis. (HBASE-4014).
055 */
056@Tag(CoprocessorTests.TAG)
057@Tag(MediumTests.TAG)
058public class TestMasterCoprocessorExceptionWithAbort {
059
060  public static class MasterTracker extends ZKNodeTracker {
061    public boolean masterZKNodeWasDeleted = false;
062
063    public MasterTracker(ZKWatcher zkw, String masterNode, Abortable abortable) {
064      super(zkw, masterNode, abortable);
065    }
066
067    @Override
068    public synchronized void nodeDeleted(String path) {
069      if (path.equals("/hbase/master")) {
070        masterZKNodeWasDeleted = true;
071      }
072    }
073  }
074
075  public static class CreateTableThread extends Thread {
076    HBaseTestingUtil UTIL;
077
078    public CreateTableThread(HBaseTestingUtil UTIL) {
079      this.UTIL = UTIL;
080    }
081
082    @Override
083    public void run() {
084      // create a table : master coprocessor will throw an exception and not
085      // catch it.
086      TableDescriptor tableDescriptor =
087        TableDescriptorBuilder.newBuilder(TableName.valueOf(TEST_TABLE))
088          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)).build();
089      try {
090        Admin admin = UTIL.getAdmin();
091        admin.createTable(tableDescriptor);
092        fail("BuggyMasterObserver failed to throw an exception.");
093      } catch (IOException e) {
094        assertEquals("java.io.InterruptedIOException", e.getClass().getName(),
095          "HBaseAdmin threw an interrupted IOException as expected.");
096      }
097    }
098  }
099
100  public static class BuggyMasterObserver implements MasterCoprocessor, MasterObserver {
101    private boolean preCreateTableCalled;
102    private boolean postCreateTableCalled;
103    private boolean startCalled;
104    private boolean postStartMasterCalled;
105
106    @Override
107    public Optional<MasterObserver> getMasterObserver() {
108      return Optional.of(this);
109    }
110
111    @Override
112    public void postCreateTable(ObserverContext<MasterCoprocessorEnvironment> env,
113      TableDescriptor desc, RegionInfo[] regions) throws IOException {
114      // cause a NullPointerException and don't catch it: this will cause the
115      // master to abort().
116      Integer i;
117      i = null;
118      i = i++;
119    }
120
121    public boolean wasCreateTableCalled() {
122      return preCreateTableCalled && postCreateTableCalled;
123    }
124
125    @Override
126    public void postStartMaster(ObserverContext<MasterCoprocessorEnvironment> ctx)
127      throws IOException {
128      postStartMasterCalled = true;
129    }
130
131    public boolean wasStartMasterCalled() {
132      return postStartMasterCalled;
133    }
134
135    @Override
136    public void start(CoprocessorEnvironment env) throws IOException {
137      startCalled = true;
138    }
139
140    public boolean wasStarted() {
141      return startCalled;
142    }
143  }
144
145  private static HBaseTestingUtil UTIL = new HBaseTestingUtil();
146  private static byte[] TEST_TABLE = Bytes.toBytes("observed_table");
147  private static byte[] TEST_FAMILY = Bytes.toBytes("fam1");
148
149  @BeforeAll
150  public static void setupBeforeClass() throws Exception {
151    Configuration conf = UTIL.getConfiguration();
152    conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, BuggyMasterObserver.class.getName());
153    conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, true);
154    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2); // Fail fast
155    UTIL.startMiniCluster();
156  }
157
158  @AfterAll
159  public static void teardownAfterClass() throws Exception {
160    UTIL.shutdownMiniCluster();
161  }
162
163  @Test
164  public void testExceptionFromCoprocessorWhenCreatingTable() throws IOException {
165    SingleProcessHBaseCluster cluster = UTIL.getHBaseCluster();
166
167    HMaster master = cluster.getMaster();
168    MasterCoprocessorHost host = master.getMasterCoprocessorHost();
169    BuggyMasterObserver cp = host.findCoprocessor(BuggyMasterObserver.class);
170    assertFalse(cp.wasCreateTableCalled(), "No table created yet");
171
172    // set a watch on the zookeeper /hbase/master node. If the master dies,
173    // the node will be deleted.
174    ZKWatcher zkw = new ZKWatcher(UTIL.getConfiguration(), "unittest", new Abortable() {
175      @Override
176      public void abort(String why, Throwable e) {
177        throw new RuntimeException("Fatal ZK error: " + why, e);
178      }
179
180      @Override
181      public boolean isAborted() {
182        return false;
183      }
184    });
185
186    MasterTracker masterTracker = new MasterTracker(zkw, "/hbase/master", new Abortable() {
187      @Override
188      public void abort(String why, Throwable e) {
189        throw new RuntimeException("Fatal ZK master tracker error, why=", e);
190      }
191
192      @Override
193      public boolean isAborted() {
194        return false;
195      }
196    });
197
198    masterTracker.start();
199    zkw.registerListener(masterTracker);
200
201    // Test (part of the) output that should have be printed by master when it aborts:
202    // (namely the part that shows the set of loaded coprocessors).
203    // In this test, there is only a single coprocessor (BuggyMasterObserver).
204    assertTrue(HMaster.getLoadedCoprocessors()
205      .contains(TestMasterCoprocessorExceptionWithAbort.BuggyMasterObserver.class.getName()));
206
207    CreateTableThread createTableThread = new CreateTableThread(UTIL);
208
209    // Attempting to create a table (using createTableThread above) triggers an NPE in
210    // BuggyMasterObserver.
211    // Master will then abort and the /hbase/master zk node will be deleted.
212    createTableThread.start();
213
214    // Wait up to 30 seconds for master's /hbase/master zk node to go away after master aborts.
215    for (int i = 0; i < 30; i++) {
216      if (masterTracker.masterZKNodeWasDeleted == true) {
217        break;
218      }
219      try {
220        Thread.sleep(1000);
221      } catch (InterruptedException e) {
222        fail("InterruptedException while waiting for master zk node to " + "be deleted.");
223      }
224    }
225
226    assertTrue(masterTracker.masterZKNodeWasDeleted,
227      "Master aborted on coprocessor exception, as expected.");
228
229    createTableThread.interrupt();
230    try {
231      createTableThread.join(1000);
232    } catch (InterruptedException e) {
233      assertTrue(true,
234        "Ignoring InterruptedException while waiting for " + " createTableThread.join().");
235    }
236  }
237
238}