001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import org.apache.hadoop.util.Shell.ExitCodeException; 023import org.apache.hadoop.util.Shell.ShellCommandExecutor; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026 027/** 028 * A utility for executing an external script that checks the health of the node. An example script 029 * can be found at <tt>src/main/sh/healthcheck/healthcheck.sh</tt> in the <tt>hbase-examples</tt> 030 * module. 031 */ 032class HealthChecker { 033 034 private static final Logger LOG = LoggerFactory.getLogger(HealthChecker.class); 035 private ShellCommandExecutor shexec = null; 036 private String exceptionStackTrace; 037 038 /** Pattern used for searching in the output of the node health script */ 039 static private final String ERROR_PATTERN = "ERROR"; 040 041 private String healthCheckScript; 042 private long scriptTimeout; 043 044 enum HealthCheckerExitStatus { 045 SUCCESS, 046 TIMED_OUT, 047 FAILED_WITH_EXIT_CODE, 048 FAILED_WITH_EXCEPTION, 049 FAILED 050 } 051 052 /** 053 * Initialize. 054 * @param location the location of the health script 055 * @param timeout the timeout to be used for the health script 056 */ 057 public void init(String location, long timeout) { 058 this.healthCheckScript = location; 059 this.scriptTimeout = timeout; 060 ArrayList<String> execScript = new ArrayList<>(); 061 execScript.add(healthCheckScript); 062 this.shexec = new ShellCommandExecutor(execScript.toArray(new String[execScript.size()]), null, 063 null, scriptTimeout); 064 LOG.info("HealthChecker initialized with script at " + this.healthCheckScript + ", timeout=" 065 + timeout); 066 } 067 068 public HealthReport checkHealth() { 069 HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS; 070 try { 071 // Calling this execute leaves around running executor threads. 072 shexec.execute(); 073 } catch (ExitCodeException e) { 074 // ignore the exit code of the script 075 LOG.warn("Caught exception : " + e + ",exit code:" + e.getExitCode()); 076 status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; 077 } catch (IOException e) { 078 LOG.warn("Caught exception : " + e); 079 status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION; 080 exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e); 081 } finally { 082 if (shexec.isTimedOut()) { 083 status = HealthCheckerExitStatus.TIMED_OUT; 084 } 085 if (status == HealthCheckerExitStatus.SUCCESS) { 086 if (hasErrors(shexec.getOutput())) { 087 status = HealthCheckerExitStatus.FAILED; 088 } 089 } 090 } 091 return new HealthReport(status, getHealthReport(status)); 092 } 093 094 private boolean hasErrors(String output) { 095 String[] splits = output.split("\n"); 096 for (String split : splits) { 097 if (split.startsWith(ERROR_PATTERN)) { 098 return true; 099 } 100 } 101 return false; 102 } 103 104 private String getHealthReport(HealthCheckerExitStatus status) { 105 String healthReport = null; 106 switch (status) { 107 case SUCCESS: 108 healthReport = "Server is healthy."; 109 break; 110 case TIMED_OUT: 111 healthReport = "Health script timed out"; 112 break; 113 case FAILED_WITH_EXCEPTION: 114 healthReport = exceptionStackTrace; 115 break; 116 case FAILED_WITH_EXIT_CODE: 117 healthReport = "Health script failed with exit code."; 118 break; 119 case FAILED: 120 healthReport = shexec.getOutput(); 121 break; 122 } 123 return healthReport; 124 } 125}