001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.mttr;
020
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assume.assumeFalse;
023
024import io.opentelemetry.api.trace.Span;
025import io.opentelemetry.context.Scope;
026import java.io.IOException;
027import java.util.ArrayList;
028import java.util.concurrent.Callable;
029import java.util.concurrent.ExecutorService;
030import java.util.concurrent.Executors;
031import java.util.concurrent.Future;
032import java.util.concurrent.TimeUnit;
033import org.apache.commons.lang3.RandomStringUtils;
034import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
035import org.apache.hadoop.hbase.ClusterMetrics;
036import org.apache.hadoop.hbase.IntegrationTestingUtility;
037import org.apache.hadoop.hbase.InvalidFamilyOperationException;
038import org.apache.hadoop.hbase.NamespaceExistException;
039import org.apache.hadoop.hbase.NamespaceNotFoundException;
040import org.apache.hadoop.hbase.TableExistsException;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.TableNotFoundException;
043import org.apache.hadoop.hbase.chaos.actions.Action;
044import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
045import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
046import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
047import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction;
048import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
049import org.apache.hadoop.hbase.client.Admin;
050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
051import org.apache.hadoop.hbase.client.Put;
052import org.apache.hadoop.hbase.client.Result;
053import org.apache.hadoop.hbase.client.ResultScanner;
054import org.apache.hadoop.hbase.client.RetriesExhaustedException;
055import org.apache.hadoop.hbase.client.Scan;
056import org.apache.hadoop.hbase.client.Table;
057import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
058import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
059import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
060import org.apache.hadoop.hbase.ipc.FatalConnectionException;
061import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
062import org.apache.hadoop.hbase.security.AccessDeniedException;
063import org.apache.hadoop.hbase.testclassification.IntegrationTests;
064import org.apache.hadoop.hbase.trace.TraceUtil;
065import org.apache.hadoop.hbase.util.Bytes;
066import org.apache.hadoop.hbase.util.LoadTestTool;
067import org.junit.AfterClass;
068import org.junit.BeforeClass;
069import org.junit.Test;
070import org.junit.experimental.categories.Category;
071import org.slf4j.Logger;
072import org.slf4j.LoggerFactory;
073
074import org.apache.hbase.thirdparty.com.google.common.base.MoreObjects;
075
076/**
077 * Integration test that should benchmark how fast HBase can recover from failures. This test starts
078 * different threads:
079 * <ol>
080 * <li>
081 * Load Test Tool.<br/>
082 * This runs so that all RegionServers will have some load and WALs will be full.
083 * </li>
084 * <li>
085 * Scan thread.<br/>
086 * This thread runs a very short scan over and over again recording how log it takes to respond.
087 * The longest response is assumed to be the time it took to recover.
088 * </li>
089 * <li>
090 * Put thread.<br/>
091 * This thread just like the scan thread except it does a very small put.
092 * </li>
093 * <li>
094 * Admin thread. <br/>
095 * This thread will continually go to the master to try and get the cluster status.  Just like the
096 * put and scan threads, the time to respond is recorded.
097 * </li>
098 * <li>
099 * Chaos Monkey thread.<br/>
100 * This thread runs a ChaosMonkey.Action.
101 * </li>
102 * </ol>
103 * <p/>
104 * The ChaosMonkey actions currently run are:
105 * <ul>
106 * <li>Restart the RegionServer holding meta.</li>
107 * <li>Move the Regions of meta.</li>
108 * <li>Restart the RegionServer holding the table the scan and put threads are targeting.</li>
109 * <li>Move the Regions of the table used by the scan and put threads.</li>
110 * <li>Restart the master.</li>
111 * </ul>
112 * <p/>
113 * At the end of the test a log line is output on the INFO level containing the timing data that was
114 * collected.
115 */
116@Category(IntegrationTests.class)
117public class IntegrationTestMTTR {
118  /**
119   * Constants.
120   */
121  private static final byte[] FAMILY = Bytes.toBytes("d");
122  private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestMTTR.class);
123  private static long sleepTime;
124  private static final String SLEEP_TIME_KEY = "hbase.IntegrationTestMTTR.sleeptime";
125  private static final long SLEEP_TIME_DEFAULT = 60 * 1000l;
126
127  /**
128   * Configurable table names.
129   */
130  private static TableName tableName;
131  private static TableName loadTableName;
132
133  /**
134   * Util to get at the cluster.
135   */
136  private static IntegrationTestingUtility util;
137
138  /**
139   * Executor for test threads.
140   */
141  private static ExecutorService executorService;
142
143  /**
144   * All of the chaos monkey actions used.
145   */
146  private static Action restartRSAction;
147  private static Action restartMetaAction;
148  private static Action moveMetaRegionsAction;
149  private static Action moveRegionAction;
150  private static Action restartMasterAction;
151
152  /**
153   * The load test tool used to create load and make sure that WALs aren't empty.
154   */
155  private static LoadTestTool loadTool;
156
157
158  @BeforeClass
159  public static void setUp() throws Exception {
160    // Set up the integration test util
161    if (util == null) {
162      util = new IntegrationTestingUtility();
163    }
164
165    // Make sure there are three servers.
166    util.initializeCluster(3);
167
168    // Set up the load test tool.
169    loadTool = new LoadTestTool();
170    loadTool.setConf(util.getConfiguration());
171
172    // Create executor with enough threads to restart rs's,
173    // run scans, puts, admin ops and load test tool.
174    executorService = Executors.newFixedThreadPool(8);
175
176    // Set up the tables needed.
177    setupTables();
178
179    // Set up the actions.
180    sleepTime = util.getConfiguration().getLong(SLEEP_TIME_KEY, SLEEP_TIME_DEFAULT);
181    setupActions();
182  }
183
184  private static void setupActions() throws IOException {
185    // allow a little more time for RS restart actions because RS start depends on having a master
186    // to report to and the master is also being monkeyed.
187    util.getConfiguration().setLong(Action.START_RS_TIMEOUT_KEY, 3 * 60 * 1000);
188
189    // Set up the action that will restart a region server holding a region from our table
190    // because this table should only have one region we should be good.
191    restartRSAction = new RestartRsHoldingTableAction(sleepTime,
192        util.getConnection().getRegionLocator(tableName));
193
194    // Set up the action that will kill the region holding meta.
195    restartMetaAction = new RestartRsHoldingMetaAction(sleepTime);
196
197    // Set up the action that will move the regions of meta.
198    moveMetaRegionsAction = new MoveRegionsOfTableAction(sleepTime,
199        MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, TableName.META_TABLE_NAME);
200
201    // Set up the action that will move the regions of our table.
202    moveRegionAction = new MoveRegionsOfTableAction(sleepTime,
203        MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName);
204
205    // Kill the master
206    restartMasterAction = new RestartActiveMasterAction(1000);
207
208    // Give the action the access to the cluster.
209    Action.ActionContext actionContext = new Action.ActionContext(util);
210    restartRSAction.init(actionContext);
211    restartMetaAction.init(actionContext);
212    moveMetaRegionsAction.init(actionContext);
213    moveRegionAction.init(actionContext);
214    restartMasterAction.init(actionContext);
215  }
216
217  private static void setupTables() throws IOException {
218    // Get the table name.
219    tableName = TableName.valueOf(util.getConfiguration()
220        .get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR"));
221
222    loadTableName = TableName.valueOf(util.getConfiguration()
223        .get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool"));
224
225    if (util.getAdmin().tableExists(tableName)) {
226      util.deleteTable(tableName);
227    }
228
229    if (util.getAdmin().tableExists(loadTableName)) {
230      util.deleteTable(loadTableName);
231    }
232
233    // Create the table.  If this fails then fail everything.
234    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
235
236    // Make the max file size huge so that splits don't happen during the test.
237    builder.setMaxFileSize(Long.MAX_VALUE);
238
239    ColumnFamilyDescriptorBuilder colDescriptorBldr =
240        ColumnFamilyDescriptorBuilder.newBuilder(FAMILY);
241    colDescriptorBldr.setMaxVersions(1);
242    builder.setColumnFamily(colDescriptorBldr.build());
243    util.getAdmin().createTable(builder.build());
244
245    // Setup the table for LoadTestTool
246    int ret = loadTool.run(new String[]{"-tn", loadTableName.getNameAsString(), "-init_only"});
247    assertEquals("Failed to initialize LoadTestTool", 0, ret);
248  }
249
250  @AfterClass
251  public static void after() throws IOException {
252    // Clean everything up.
253    util.restoreCluster();
254    util = null;
255
256    // Stop the threads so that we know everything is complete.
257    executorService.shutdown();
258    executorService = null;
259
260    // Clean up the actions.
261    moveRegionAction = null;
262    restartMetaAction = null;
263    moveMetaRegionsAction = null;
264    restartRSAction = null;
265    restartMasterAction = null;
266
267    loadTool = null;
268  }
269
270  private static boolean tablesOnMaster() {
271    boolean ret = true;
272    String value = util.getConfiguration().get("hbase.balancer.tablesOnMaster");
273    if( value != null && value.equalsIgnoreCase("none")) {
274      ret = false;
275    }
276    return ret;
277  }
278
279  @Test
280  public void testRestartRsHoldingTable() throws Exception {
281    run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction");
282  }
283
284  @Test
285  public void testKillRsHoldingMeta() throws Exception {
286    assumeFalse(tablesOnMaster());
287    run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
288  }
289
290  @Test
291  public void testMoveMeta() throws Exception {
292    run(new ActionCallable(moveMetaRegionsAction), "MoveMeta");
293  }
294
295  @Test
296  public void testMoveRegion() throws Exception {
297    run(new ActionCallable(moveRegionAction), "MoveRegion");
298  }
299
300  @Test
301  public void testRestartMaster() throws Exception {
302    run(new ActionCallable(restartMasterAction), "RestartMaster");
303  }
304
305  public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
306    int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
307    LOG.info("Starting " + testName + " with " + maxIters + " iterations.");
308
309    // Array to keep track of times.
310    ArrayList<TimingResult> resultPuts = new ArrayList<>(maxIters);
311    ArrayList<TimingResult> resultScan = new ArrayList<>(maxIters);
312    ArrayList<TimingResult> resultAdmin = new ArrayList<>(maxIters);
313    long start = System.nanoTime();
314
315    try {
316      // We're going to try this multiple times
317      for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
318        // Create and start executing a callable that will kill the servers
319        Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
320
321        // Pass that future to the timing Callables.
322        Future<TimingResult> putFuture = executorService.submit(new PutCallable(monkeyFuture));
323        Future<TimingResult> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
324        Future<TimingResult> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
325
326        Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
327
328        monkeyFuture.get();
329        loadFuture.get();
330
331        // Get the values from the futures.
332        TimingResult putTime = putFuture.get();
333        TimingResult scanTime = scanFuture.get();
334        TimingResult adminTime = adminFuture.get();
335
336        // Store the times to display later.
337        resultPuts.add(putTime);
338        resultScan.add(scanTime);
339        resultAdmin.add(adminTime);
340
341        // Wait some time for everything to settle down.
342        Thread.sleep(5000l);
343      }
344    } catch (Exception e) {
345      long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
346      LOG.info(testName + " failed after " + runtimeMs + "ms.", e);
347      throw e;
348    }
349
350    long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
351
352    MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper("MTTRResults")
353        .add("putResults", resultPuts)
354        .add("scanResults", resultScan)
355        .add("adminResults", resultAdmin)
356        .add("totalRuntimeMs", runtimeMs)
357        .add("name", testName);
358
359    // Log the info
360    LOG.info(helper.toString());
361  }
362
363  /**
364   * Class to store results of TimingCallable.
365   *
366   * Stores times and trace id.
367   */
368  private static class TimingResult {
369    DescriptiveStatistics stats = new DescriptiveStatistics();
370    ArrayList<String> traces = new ArrayList<>(10);
371
372    /**
373     * Add a result to this aggregate result.
374     * @param time Time in nanoseconds
375     * @param span Span.  To be kept if the time taken was over 1 second
376     */
377    public void addResult(long time, Span span) {
378      stats.addValue(TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS));
379      if (TimeUnit.SECONDS.convert(time, TimeUnit.NANOSECONDS) >= 1) {
380        traces.add(span.getSpanContext().getTraceId());
381      }
382    }
383
384    @Override
385    public String toString() {
386      MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this)
387          .add("numResults", stats.getN())
388          .add("minTime", stats.getMin())
389          .add("meanTime", stats.getMean())
390          .add("maxTime", stats.getMax())
391          .add("25th", stats.getPercentile(25))
392          .add("50th", stats.getPercentile(50))
393          .add("75th", stats.getPercentile(75))
394          .add("90th", stats.getPercentile(90))
395          .add("95th", stats.getPercentile(95))
396          .add("99th", stats.getPercentile(99))
397          .add("99.9th", stats.getPercentile(99.9))
398          .add("99.99th", stats.getPercentile(99.99))
399          .add("traces", traces);
400      return helper.toString();
401    }
402  }
403
404  /**
405   * Base class for actions that need to record the time needed to recover from a failure.
406   */
407  static abstract class TimingCallable implements Callable<TimingResult> {
408    protected final Future<?> future;
409
410    public TimingCallable(Future<?> f) {
411      future = f;
412    }
413
414    @Override
415    public TimingResult call() throws Exception {
416      TimingResult result = new TimingResult();
417      final int maxIterations = 10;
418      int numAfterDone = 0;
419      int resetCount = 0;
420      // Keep trying until the rs is back up and we've gotten a put through
421      while (numAfterDone < maxIterations) {
422        long start = System.nanoTime();
423        Span span = TraceUtil.getGlobalTracer().spanBuilder(getSpanName()).startSpan();
424        try (Scope scope = span.makeCurrent()) {
425          boolean actionResult = doAction();
426          if (actionResult && future.isDone()) {
427            numAfterDone++;
428          }
429
430        // the following Exceptions derive from DoNotRetryIOException. They are considered
431        // fatal for the purpose of this test. If we see one of these, it means something is
432        // broken and needs investigation. This is not the case for all children of DNRIOE.
433        // Unfortunately, this is an explicit enumeration and will need periodically refreshed.
434        // See HBASE-9655 for further discussion.
435        } catch (AccessDeniedException e) {
436          throw e;
437        } catch (CoprocessorException e) {
438          throw e;
439        } catch (FatalConnectionException e) {
440          throw e;
441        } catch (InvalidFamilyOperationException e) {
442          throw e;
443        } catch (NamespaceExistException e) {
444          throw e;
445        } catch (NamespaceNotFoundException e) {
446          throw e;
447        } catch (NoSuchColumnFamilyException e) {
448          throw e;
449        } catch (TableExistsException e) {
450          throw e;
451        } catch (TableNotFoundException e) {
452          throw e;
453        } catch (RetriesExhaustedException e){
454          throw e;
455        // Everything else is potentially recoverable on the application side. For instance, a CM
456        // action kills the RS that hosted a scanner the client was using. Continued use of that
457        // scanner should be terminated, but a new scanner can be created and the read attempted
458        // again.
459        } catch (Exception e) {
460          resetCount++;
461          if (resetCount < maxIterations) {
462            LOG.info("Non-fatal exception while running " + this.toString()
463              + ". Resetting loop counter", e);
464            numAfterDone = 0;
465          } else {
466            LOG.info("Too many unexpected Exceptions. Aborting.", e);
467            throw e;
468          }
469        } finally {
470          span.end();
471        }
472        result.addResult(System.nanoTime() - start, span);
473      }
474      return result;
475    }
476
477    protected abstract boolean doAction() throws Exception;
478
479    protected String getSpanName() {
480      return this.getClass().getSimpleName();
481    }
482
483    @Override
484    public String toString() {
485      return this.getSpanName();
486    }
487  }
488
489  /**
490   * Callable that will keep putting small amounts of data into a table
491   * until  the future supplied returns.  It keeps track of the max time.
492   */
493  static class PutCallable extends TimingCallable {
494
495    private final Table table;
496
497    public PutCallable(Future<?> f) throws IOException {
498      super(f);
499      this.table = util.getConnection().getTable(tableName);
500    }
501
502    @Override
503    protected boolean doAction() throws Exception {
504      Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
505      p.addColumn(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
506      table.put(p);
507      return true;
508    }
509
510    @Override
511    protected String getSpanName() {
512      return "MTTR Put Test";
513    }
514  }
515
516  /**
517   * Callable that will keep scanning for small amounts of data until the
518   * supplied future returns.  Returns the max time taken to scan.
519   */
520  static class ScanCallable extends TimingCallable {
521    private final Table table;
522
523    public ScanCallable(Future<?> f) throws IOException {
524      super(f);
525      this.table = util.getConnection().getTable(tableName);
526    }
527
528    @Override
529    protected boolean doAction() throws Exception {
530      ResultScanner rs = null;
531      try {
532        Scan s = new Scan();
533        s.setBatch(2);
534        s.addFamily(FAMILY);
535        s.setFilter(new KeyOnlyFilter());
536        s.readVersions(1);
537
538        rs = table.getScanner(s);
539        Result result = rs.next();
540        return result != null && result.size() > 0;
541      } finally {
542        if (rs != null) {
543          rs.close();
544        }
545      }
546    }
547    @Override
548    protected String getSpanName() {
549      return "MTTR Scan Test";
550    }
551  }
552
553  /**
554   * Callable that will keep going to the master for cluster status.  Returns the max time taken.
555   */
556  static class AdminCallable extends TimingCallable {
557
558    public AdminCallable(Future<?> f) throws IOException {
559      super(f);
560    }
561
562    @Override
563    protected boolean doAction() throws Exception {
564      Admin admin = null;
565      try {
566        admin = util.getAdmin();
567        ClusterMetrics status = admin.getClusterMetrics();
568        return status != null;
569      } finally {
570        if (admin != null) {
571          admin.close();
572        }
573      }
574    }
575
576    @Override
577    protected String getSpanName() {
578      return "MTTR Admin Test";
579    }
580  }
581
582
583  static class ActionCallable implements Callable<Boolean> {
584    private final Action action;
585
586    public ActionCallable(Action action) {
587      this.action = action;
588    }
589
590    @Override
591    public Boolean call() throws Exception {
592      this.action.perform();
593      return true;
594    }
595  }
596
597  /**
598   * Callable used to make sure the cluster has some load on it.
599   * This callable uses LoadTest tool to
600   */
601  public static class LoadCallable implements Callable<Boolean> {
602
603    private final Future<?> future;
604
605    public LoadCallable(Future<?> f) {
606      future = f;
607    }
608
609    @Override
610    public Boolean call() throws Exception {
611      int colsPerKey = 10;
612      int numServers = util.getHBaseClusterInterface().getInitialClusterMetrics()
613        .getLiveServerMetrics().size();
614      int numKeys = numServers * 5000;
615      int writeThreads = 10;
616
617
618      // Loop until the chaos monkey future is done.
619      // But always go in just in case some action completes quickly
620      do {
621        int ret = loadTool.run(new String[]{
622            "-tn", loadTableName.getNameAsString(),
623            "-write", String.format("%d:%d:%d", colsPerKey, 500, writeThreads),
624            "-num_keys", String.valueOf(numKeys),
625            "-skip_init"
626        });
627        assertEquals("Load failed", 0, ret);
628      } while (!future.isDone());
629
630      return true;
631    }
632  }
633}