001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.mttr;
020
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assume.assumeFalse;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.concurrent.Callable;
027import java.util.concurrent.ExecutorService;
028import java.util.concurrent.Executors;
029import java.util.concurrent.Future;
030import java.util.concurrent.TimeUnit;
031import org.apache.commons.lang3.RandomStringUtils;
032import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
033import org.apache.hadoop.hbase.ClusterMetrics;
034import org.apache.hadoop.hbase.IntegrationTestingUtility;
035import org.apache.hadoop.hbase.InvalidFamilyOperationException;
036import org.apache.hadoop.hbase.NamespaceExistException;
037import org.apache.hadoop.hbase.NamespaceNotFoundException;
038import org.apache.hadoop.hbase.TableExistsException;
039import org.apache.hadoop.hbase.TableName;
040import org.apache.hadoop.hbase.TableNotFoundException;
041import org.apache.hadoop.hbase.chaos.actions.Action;
042import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
043import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
044import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
045import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction;
046import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
047import org.apache.hadoop.hbase.client.Admin;
048import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
049import org.apache.hadoop.hbase.client.Put;
050import org.apache.hadoop.hbase.client.Result;
051import org.apache.hadoop.hbase.client.ResultScanner;
052import org.apache.hadoop.hbase.client.RetriesExhaustedException;
053import org.apache.hadoop.hbase.client.Scan;
054import org.apache.hadoop.hbase.client.Table;
055import org.apache.hadoop.hbase.client.TableDescriptor;
056import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
057import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
058import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
059import org.apache.hadoop.hbase.ipc.FatalConnectionException;
060import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
061import org.apache.hadoop.hbase.security.AccessDeniedException;
062import org.apache.hadoop.hbase.testclassification.IntegrationTests;
063import org.apache.hadoop.hbase.trace.TraceUtil;
064import org.apache.hadoop.hbase.util.Bytes;
065import org.apache.hadoop.hbase.util.LoadTestTool;
066import org.apache.htrace.core.AlwaysSampler;
067import org.apache.htrace.core.Span;
068import org.apache.htrace.core.TraceScope;
069import org.junit.AfterClass;
070import org.junit.BeforeClass;
071import org.junit.Test;
072import org.junit.experimental.categories.Category;
073import org.slf4j.Logger;
074import org.slf4j.LoggerFactory;
075
076import org.apache.hbase.thirdparty.com.google.common.base.MoreObjects;
077
078/**
079 * Integration test that should benchmark how fast HBase can recover from failures. This test starts
080 * different threads:
081 * <ol>
082 * <li>
083 * Load Test Tool.<br/>
084 * This runs so that all RegionServers will have some load and WALs will be full.
085 * </li>
086 * <li>
087 * Scan thread.<br/>
088 * This thread runs a very short scan over and over again recording how log it takes to respond.
089 * The longest response is assumed to be the time it took to recover.
090 * </li>
091 * <li>
092 * Put thread.<br/>
093 * This thread just like the scan thread except it does a very small put.
094 * </li>
095 * <li>
096 * Admin thread. <br/>
097 * This thread will continually go to the master to try and get the cluster status.  Just like the
098 * put and scan threads, the time to respond is recorded.
099 * </li>
100 * <li>
101 * Chaos Monkey thread.<br/>
102 * This thread runs a ChaosMonkey.Action.
103 * </li>
104 * </ol>
105 * <p/>
106 * The ChaosMonkey actions currently run are:
107 * <ul>
108 * <li>Restart the RegionServer holding meta.</li>
109 * <li>Move the Regions of meta.</li>
110 * <li>Restart the RegionServer holding the table the scan and put threads are targeting.</li>
111 * <li>Move the Regions of the table used by the scan and put threads.</li>
112 * <li>Restart the master.</li>
113 * </ul>
114 * <p/>
115 * At the end of the test a log line is output on the INFO level containing the timing data that was
116 * collected.
117 */
118@Category(IntegrationTests.class)
119public class IntegrationTestMTTR {
120  /**
121   * Constants.
122   */
123  private static final byte[] FAMILY = Bytes.toBytes("d");
124  private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestMTTR.class);
125  private static long sleepTime;
126  private static final String SLEEP_TIME_KEY = "hbase.IntegrationTestMTTR.sleeptime";
127  private static final long SLEEP_TIME_DEFAULT = 60 * 1000l;
128
129  /**
130   * Configurable table names.
131   */
132  private static TableName tableName;
133  private static TableName loadTableName;
134
135  /**
136   * Util to get at the cluster.
137   */
138  private static IntegrationTestingUtility util;
139
140  /**
141   * Executor for test threads.
142   */
143  private static ExecutorService executorService;
144
145  /**
146   * All of the chaos monkey actions used.
147   */
148  private static Action restartRSAction;
149  private static Action restartMetaAction;
150  private static Action moveMetaRegionsAction;
151  private static Action moveRegionAction;
152  private static Action restartMasterAction;
153
154  /**
155   * The load test tool used to create load and make sure that WALs aren't empty.
156   */
157  private static LoadTestTool loadTool;
158
159
160  @BeforeClass
161  public static void setUp() throws Exception {
162    // Set up the integration test util
163    if (util == null) {
164      util = new IntegrationTestingUtility();
165    }
166
167    // Make sure there are three servers.
168    util.initializeCluster(3);
169
170    // Set up the load test tool.
171    loadTool = new LoadTestTool();
172    loadTool.setConf(util.getConfiguration());
173
174    // Create executor with enough threads to restart rs's,
175    // run scans, puts, admin ops and load test tool.
176    executorService = Executors.newFixedThreadPool(8);
177
178    // Set up the tables needed.
179    setupTables();
180
181    // Set up the actions.
182    sleepTime = util.getConfiguration().getLong(SLEEP_TIME_KEY, SLEEP_TIME_DEFAULT);
183    setupActions();
184  }
185
186  private static void setupActions() throws IOException {
187    // allow a little more time for RS restart actions because RS start depends on having a master
188    // to report to and the master is also being monkeyed.
189    util.getConfiguration().setLong(Action.START_RS_TIMEOUT_KEY, 3 * 60 * 1000);
190
191    // Set up the action that will restart a region server holding a region from our table
192    // because this table should only have one region we should be good.
193    restartRSAction = new RestartRsHoldingTableAction(sleepTime,
194        util.getConnection().getRegionLocator(tableName));
195
196    // Set up the action that will kill the region holding meta.
197    restartMetaAction = new RestartRsHoldingMetaAction(sleepTime);
198
199    // Set up the action that will move the regions of meta.
200    moveMetaRegionsAction = new MoveRegionsOfTableAction(sleepTime,
201        MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, TableName.META_TABLE_NAME);
202
203    // Set up the action that will move the regions of our table.
204    moveRegionAction = new MoveRegionsOfTableAction(sleepTime,
205        MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName);
206
207    // Kill the master
208    restartMasterAction = new RestartActiveMasterAction(1000);
209
210    // Give the action the access to the cluster.
211    Action.ActionContext actionContext = new Action.ActionContext(util);
212    restartRSAction.init(actionContext);
213    restartMetaAction.init(actionContext);
214    moveMetaRegionsAction.init(actionContext);
215    moveRegionAction.init(actionContext);
216    restartMasterAction.init(actionContext);
217  }
218
219  private static void setupTables() throws IOException {
220    // Get the table name.
221    tableName = TableName.valueOf(util.getConfiguration()
222        .get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR"));
223
224    loadTableName = TableName.valueOf(util.getConfiguration()
225        .get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool"));
226
227    if (util.getAdmin().tableExists(tableName)) {
228      util.deleteTable(tableName);
229    }
230
231    if (util.getAdmin().tableExists(loadTableName)) {
232      util.deleteTable(loadTableName);
233    }
234
235    // Create the table.  If this fails then fail everything.
236    TableDescriptor tableDescriptor = util.getAdmin().getDescriptor(tableName);
237    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor);
238
239    // Make the max file size huge so that splits don't happen during the test.
240    builder.setMaxFileSize(Long.MAX_VALUE);
241
242    ColumnFamilyDescriptorBuilder colDescriptorBldr =
243        ColumnFamilyDescriptorBuilder.newBuilder(FAMILY);
244    colDescriptorBldr.setMaxVersions(1);
245    builder.setColumnFamily(colDescriptorBldr.build());
246    util.getAdmin().createTable(builder.build());
247
248    // Setup the table for LoadTestTool
249    int ret = loadTool.run(new String[]{"-tn", loadTableName.getNameAsString(), "-init_only"});
250    assertEquals("Failed to initialize LoadTestTool", 0, ret);
251  }
252
253  @AfterClass
254  public static void after() throws IOException {
255    // Clean everything up.
256    util.restoreCluster();
257    util = null;
258
259    // Stop the threads so that we know everything is complete.
260    executorService.shutdown();
261    executorService = null;
262
263    // Clean up the actions.
264    moveRegionAction = null;
265    restartMetaAction = null;
266    moveMetaRegionsAction = null;
267    restartRSAction = null;
268    restartMasterAction = null;
269
270    loadTool = null;
271  }
272
273  private static boolean tablesOnMaster() {
274    boolean ret = true;
275    String value = util.getConfiguration().get("hbase.balancer.tablesOnMaster");
276    if( value != null && value.equalsIgnoreCase("none")) {
277      ret = false;
278    }
279    return ret;
280  }
281
282  @Test
283  public void testRestartRsHoldingTable() throws Exception {
284    run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction");
285  }
286
287  @Test
288  public void testKillRsHoldingMeta() throws Exception {
289    assumeFalse(tablesOnMaster());
290    run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
291  }
292
293  @Test
294  public void testMoveMeta() throws Exception {
295    run(new ActionCallable(moveMetaRegionsAction), "MoveMeta");
296  }
297
298  @Test
299  public void testMoveRegion() throws Exception {
300    run(new ActionCallable(moveRegionAction), "MoveRegion");
301  }
302
303  @Test
304  public void testRestartMaster() throws Exception {
305    run(new ActionCallable(restartMasterAction), "RestartMaster");
306  }
307
308  public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
309    int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
310    LOG.info("Starting " + testName + " with " + maxIters + " iterations.");
311
312    // Array to keep track of times.
313    ArrayList<TimingResult> resultPuts = new ArrayList<>(maxIters);
314    ArrayList<TimingResult> resultScan = new ArrayList<>(maxIters);
315    ArrayList<TimingResult> resultAdmin = new ArrayList<>(maxIters);
316    long start = System.nanoTime();
317
318    try {
319      // We're going to try this multiple times
320      for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
321        // Create and start executing a callable that will kill the servers
322        Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
323
324        // Pass that future to the timing Callables.
325        Future<TimingResult> putFuture = executorService.submit(new PutCallable(monkeyFuture));
326        Future<TimingResult> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
327        Future<TimingResult> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
328
329        Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
330
331        monkeyFuture.get();
332        loadFuture.get();
333
334        // Get the values from the futures.
335        TimingResult putTime = putFuture.get();
336        TimingResult scanTime = scanFuture.get();
337        TimingResult adminTime = adminFuture.get();
338
339        // Store the times to display later.
340        resultPuts.add(putTime);
341        resultScan.add(scanTime);
342        resultAdmin.add(adminTime);
343
344        // Wait some time for everything to settle down.
345        Thread.sleep(5000l);
346      }
347    } catch (Exception e) {
348      long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
349      LOG.info(testName + " failed after " + runtimeMs + "ms.", e);
350      throw e;
351    }
352
353    long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
354
355    MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper("MTTRResults")
356        .add("putResults", resultPuts)
357        .add("scanResults", resultScan)
358        .add("adminResults", resultAdmin)
359        .add("totalRuntimeMs", runtimeMs)
360        .add("name", testName);
361
362    // Log the info
363    LOG.info(helper.toString());
364  }
365
366  /**
367   * Class to store results of TimingCallable.
368   *
369   * Stores times and trace id.
370   */
371  private static class TimingResult {
372    DescriptiveStatistics stats = new DescriptiveStatistics();
373    ArrayList<String> traces = new ArrayList<>(10);
374
375    /**
376     * Add a result to this aggregate result.
377     * @param time Time in nanoseconds
378     * @param span Span.  To be kept if the time taken was over 1 second
379     */
380    public void addResult(long time, Span span) {
381      if (span == null) {
382        return;
383      }
384      stats.addValue(TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS));
385      if (TimeUnit.SECONDS.convert(time, TimeUnit.NANOSECONDS) >= 1) {
386        traces.add(span.getTracerId());
387      }
388    }
389
390    @Override
391    public String toString() {
392      MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this)
393          .add("numResults", stats.getN())
394          .add("minTime", stats.getMin())
395          .add("meanTime", stats.getMean())
396          .add("maxTime", stats.getMax())
397          .add("25th", stats.getPercentile(25))
398          .add("50th", stats.getPercentile(50))
399          .add("75th", stats.getPercentile(75))
400          .add("90th", stats.getPercentile(90))
401          .add("95th", stats.getPercentile(95))
402          .add("99th", stats.getPercentile(99))
403          .add("99.9th", stats.getPercentile(99.9))
404          .add("99.99th", stats.getPercentile(99.99))
405          .add("traces", traces);
406      return helper.toString();
407    }
408  }
409
410  /**
411   * Base class for actions that need to record the time needed to recover from a failure.
412   */
413  static abstract class TimingCallable implements Callable<TimingResult> {
414    protected final Future<?> future;
415
416    public TimingCallable(Future<?> f) {
417      future = f;
418    }
419
420    @Override
421    public TimingResult call() throws Exception {
422      TimingResult result = new TimingResult();
423      final int maxIterations = 10;
424      int numAfterDone = 0;
425      int resetCount = 0;
426      TraceUtil.addSampler(AlwaysSampler.INSTANCE);
427      // Keep trying until the rs is back up and we've gotten a put through
428      while (numAfterDone < maxIterations) {
429        long start = System.nanoTime();
430        Span span = null;
431        try (TraceScope scope = TraceUtil.createTrace(getSpanName())) {
432          if (scope != null) {
433            span = scope.getSpan();
434          }
435          boolean actionResult = doAction();
436          if (actionResult && future.isDone()) {
437            numAfterDone++;
438          }
439
440        // the following Exceptions derive from DoNotRetryIOException. They are considered
441        // fatal for the purpose of this test. If we see one of these, it means something is
442        // broken and needs investigation. This is not the case for all children of DNRIOE.
443        // Unfortunately, this is an explicit enumeration and will need periodically refreshed.
444        // See HBASE-9655 for further discussion.
445        } catch (AccessDeniedException e) {
446          throw e;
447        } catch (CoprocessorException e) {
448          throw e;
449        } catch (FatalConnectionException e) {
450          throw e;
451        } catch (InvalidFamilyOperationException e) {
452          throw e;
453        } catch (NamespaceExistException e) {
454          throw e;
455        } catch (NamespaceNotFoundException e) {
456          throw e;
457        } catch (NoSuchColumnFamilyException e) {
458          throw e;
459        } catch (TableExistsException e) {
460          throw e;
461        } catch (TableNotFoundException e) {
462          throw e;
463        } catch (RetriesExhaustedException e){
464          throw e;
465
466        // Everything else is potentially recoverable on the application side. For instance, a CM
467        // action kills the RS that hosted a scanner the client was using. Continued use of that
468        // scanner should be terminated, but a new scanner can be created and the read attempted
469        // again.
470        } catch (Exception e) {
471          resetCount++;
472          if (resetCount < maxIterations) {
473            LOG.info("Non-fatal exception while running " + this.toString()
474              + ". Resetting loop counter", e);
475            numAfterDone = 0;
476          } else {
477            LOG.info("Too many unexpected Exceptions. Aborting.", e);
478            throw e;
479          }
480        }
481        result.addResult(System.nanoTime() - start, span);
482      }
483      return result;
484    }
485
486    protected abstract boolean doAction() throws Exception;
487
488    protected String getSpanName() {
489      return this.getClass().getSimpleName();
490    }
491
492    @Override
493    public String toString() {
494      return this.getSpanName();
495    }
496  }
497
498  /**
499   * Callable that will keep putting small amounts of data into a table
500   * until  the future supplied returns.  It keeps track of the max time.
501   */
502  static class PutCallable extends TimingCallable {
503
504    private final Table table;
505
506    public PutCallable(Future<?> f) throws IOException {
507      super(f);
508      this.table = util.getConnection().getTable(tableName);
509    }
510
511    @Override
512    protected boolean doAction() throws Exception {
513      Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
514      p.addColumn(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
515      table.put(p);
516      return true;
517    }
518
519    @Override
520    protected String getSpanName() {
521      return "MTTR Put Test";
522    }
523  }
524
525  /**
526   * Callable that will keep scanning for small amounts of data until the
527   * supplied future returns.  Returns the max time taken to scan.
528   */
529  static class ScanCallable extends TimingCallable {
530    private final Table table;
531
532    public ScanCallable(Future<?> f) throws IOException {
533      super(f);
534      this.table = util.getConnection().getTable(tableName);
535    }
536
537    @Override
538    protected boolean doAction() throws Exception {
539      ResultScanner rs = null;
540      try {
541        Scan s = new Scan();
542        s.setBatch(2);
543        s.addFamily(FAMILY);
544        s.setFilter(new KeyOnlyFilter());
545        s.setMaxVersions(1);
546
547        rs = table.getScanner(s);
548        Result result = rs.next();
549        return result != null && result.size() > 0;
550      } finally {
551        if (rs != null) {
552          rs.close();
553        }
554      }
555    }
556    @Override
557    protected String getSpanName() {
558      return "MTTR Scan Test";
559    }
560  }
561
562  /**
563   * Callable that will keep going to the master for cluster status.  Returns the max time taken.
564   */
565  static class AdminCallable extends TimingCallable {
566
567    public AdminCallable(Future<?> f) throws IOException {
568      super(f);
569    }
570
571    @Override
572    protected boolean doAction() throws Exception {
573      Admin admin = null;
574      try {
575        admin = util.getAdmin();
576        ClusterMetrics status = admin.getClusterMetrics();
577        return status != null;
578      } finally {
579        if (admin != null) {
580          admin.close();
581        }
582      }
583    }
584
585    @Override
586    protected String getSpanName() {
587      return "MTTR Admin Test";
588    }
589  }
590
591
592  static class ActionCallable implements Callable<Boolean> {
593    private final Action action;
594
595    public ActionCallable(Action action) {
596      this.action = action;
597    }
598
599    @Override
600    public Boolean call() throws Exception {
601      this.action.perform();
602      return true;
603    }
604  }
605
606  /**
607   * Callable used to make sure the cluster has some load on it.
608   * This callable uses LoadTest tool to
609   */
610  public static class LoadCallable implements Callable<Boolean> {
611
612    private final Future<?> future;
613
614    public LoadCallable(Future<?> f) {
615      future = f;
616    }
617
618    @Override
619    public Boolean call() throws Exception {
620      int colsPerKey = 10;
621      int numServers = util.getHBaseClusterInterface().getInitialClusterMetrics()
622        .getLiveServerMetrics().size();
623      int numKeys = numServers * 5000;
624      int writeThreads = 10;
625
626
627      // Loop until the chaos monkey future is done.
628      // But always go in just in case some action completes quickly
629      do {
630        int ret = loadTool.run(new String[]{
631            "-tn", loadTableName.getNameAsString(),
632            "-write", String.format("%d:%d:%d", colsPerKey, 500, writeThreads),
633            "-num_keys", String.valueOf(numKeys),
634            "-skip_init"
635        });
636        assertEquals("Load failed", 0, ret);
637      } while (!future.isDone());
638
639      return true;
640    }
641  }
642}