001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.io.IOException;
021import java.util.ArrayList;
022
023import org.apache.hadoop.util.Shell.ExitCodeException;
024import org.apache.hadoop.util.Shell.ShellCommandExecutor;
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028/**
029 * A utility for executing an external script that checks the health of
030 * the node. An example script can be found at
031 * <tt>src/main/sh/healthcheck/healthcheck.sh</tt> in the
032 * <tt>hbase-examples</tt> module.
033 */
034class HealthChecker {
035
036  private static final Logger LOG = LoggerFactory.getLogger(HealthChecker.class);
037  private ShellCommandExecutor shexec = null;
038  private String exceptionStackTrace;
039
040  /** Pattern used for searching in the output of the node health script */
041  static private final String ERROR_PATTERN = "ERROR";
042
043  private String healthCheckScript;
044  private long scriptTimeout;
045
046  enum HealthCheckerExitStatus {
047    SUCCESS,
048    TIMED_OUT,
049    FAILED_WITH_EXIT_CODE,
050    FAILED_WITH_EXCEPTION,
051    FAILED
052  }
053
054  /**
055   * Initialize.
056   *
057   * @param location the location of the health script
058   * @param timeout the timeout to be used for the health script
059   */
060  public void init(String location, long timeout) {
061    this.healthCheckScript = location;
062    this.scriptTimeout = timeout;
063    ArrayList<String> execScript = new ArrayList<>();
064    execScript.add(healthCheckScript);
065    this.shexec = new ShellCommandExecutor(execScript.toArray(new String[execScript.size()]), null,
066        null, scriptTimeout);
067    LOG.info("HealthChecker initialized with script at " + this.healthCheckScript +
068      ", timeout=" + timeout);
069  }
070
071  public HealthReport checkHealth() {
072    HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS;
073    try {
074      // Calling this execute leaves around running executor threads.
075      shexec.execute();
076    } catch (ExitCodeException e) {
077      // ignore the exit code of the script
078      LOG.warn("Caught exception : " + e + ",exit code:" + e.getExitCode());
079      status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
080    } catch (IOException e) {
081      LOG.warn("Caught exception : " + e);
082      status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
083      exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e);
084    } finally {
085      if (shexec.isTimedOut()) {
086        status = HealthCheckerExitStatus.TIMED_OUT;
087      }
088      if (status == HealthCheckerExitStatus.SUCCESS) {
089        if (hasErrors(shexec.getOutput())) {
090          status = HealthCheckerExitStatus.FAILED;
091        }
092      }
093    }
094    return new HealthReport(status, getHealthReport(status));
095  }
096
097  private boolean hasErrors(String output) {
098    String[] splits = output.split("\n");
099    for (String split : splits) {
100      if (split.startsWith(ERROR_PATTERN)) {
101        return true;
102      }
103    }
104    return false;
105  }
106
107  private String getHealthReport(HealthCheckerExitStatus status){
108    String healthReport = null;
109    switch (status) {
110    case SUCCESS:
111      healthReport = "Server is healthy.";
112      break;
113    case TIMED_OUT:
114      healthReport = "Health script timed out";
115      break;
116    case FAILED_WITH_EXCEPTION:
117      healthReport = exceptionStackTrace;
118      break;
119    case FAILED_WITH_EXIT_CODE:
120      healthReport = "Health script failed with exit code.";
121      break;
122    case FAILED:
123      healthReport = shexec.getOutput();
124      break;
125    }
126    return healthReport;
127  }
128}