001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import org.apache.hadoop.util.Shell.ExitCodeException;
023import org.apache.hadoop.util.Shell.ShellCommandExecutor;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027/**
028 * A utility for executing an external script that checks the health of the node. An example script
029 * can be found at <tt>src/main/sh/healthcheck/healthcheck.sh</tt> in the <tt>hbase-examples</tt>
030 * module.
031 */
032class HealthChecker {
033
034  private static final Logger LOG = LoggerFactory.getLogger(HealthChecker.class);
035  private ShellCommandExecutor shexec = null;
036  private String exceptionStackTrace;
037
038  /** Pattern used for searching in the output of the node health script */
039  static private final String ERROR_PATTERN = "ERROR";
040
041  private String healthCheckScript;
042  private long scriptTimeout;
043
044  enum HealthCheckerExitStatus {
045    SUCCESS,
046    TIMED_OUT,
047    FAILED_WITH_EXIT_CODE,
048    FAILED_WITH_EXCEPTION,
049    FAILED
050  }
051
052  /**
053   * Initialize.
054   * @param location the location of the health script
055   * @param timeout  the timeout to be used for the health script
056   */
057  public void init(String location, long timeout) {
058    this.healthCheckScript = location;
059    this.scriptTimeout = timeout;
060    ArrayList<String> execScript = new ArrayList<>();
061    execScript.add(healthCheckScript);
062    this.shexec = new ShellCommandExecutor(execScript.toArray(new String[execScript.size()]), null,
063      null, scriptTimeout);
064    LOG.info("HealthChecker initialized with script at " + this.healthCheckScript + ", timeout="
065      + timeout);
066  }
067
068  public HealthReport checkHealth() {
069    HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS;
070    try {
071      // Calling this execute leaves around running executor threads.
072      shexec.execute();
073    } catch (ExitCodeException e) {
074      // ignore the exit code of the script
075      LOG.warn("Caught exception : " + e + ",exit code:" + e.getExitCode());
076      status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
077    } catch (IOException e) {
078      LOG.warn("Caught exception : " + e);
079      status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
080      exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e);
081    } finally {
082      if (shexec.isTimedOut()) {
083        status = HealthCheckerExitStatus.TIMED_OUT;
084      }
085      if (status == HealthCheckerExitStatus.SUCCESS) {
086        if (hasErrors(shexec.getOutput())) {
087          status = HealthCheckerExitStatus.FAILED;
088        }
089      }
090    }
091    return new HealthReport(status, getHealthReport(status));
092  }
093
094  private boolean hasErrors(String output) {
095    String[] splits = output.split("\n");
096    for (String split : splits) {
097      if (split.startsWith(ERROR_PATTERN)) {
098        return true;
099      }
100    }
101    return false;
102  }
103
104  private String getHealthReport(HealthCheckerExitStatus status) {
105    String healthReport = null;
106    switch (status) {
107      case SUCCESS:
108        healthReport = "Server is healthy.";
109        break;
110      case TIMED_OUT:
111        healthReport = "Health script timed out";
112        break;
113      case FAILED_WITH_EXCEPTION:
114        healthReport = exceptionStackTrace;
115        break;
116      case FAILED_WITH_EXIT_CODE:
117        healthReport = "Health script failed with exit code.";
118        break;
119      case FAILED:
120        healthReport = shexec.getOutput();
121        break;
122    }
123    return healthReport;
124  }
125}