001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.io.IOException; 021import java.util.ArrayList; 022 023import org.apache.hadoop.util.Shell.ExitCodeException; 024import org.apache.hadoop.util.Shell.ShellCommandExecutor; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027 028/** 029 * A utility for executing an external script that checks the health of 030 * the node. An example script can be found at 031 * <tt>src/main/sh/healthcheck/healthcheck.sh</tt> in the 032 * <tt>hbase-examples</tt> module. 033 */ 034class HealthChecker { 035 036 private static final Logger LOG = LoggerFactory.getLogger(HealthChecker.class); 037 private ShellCommandExecutor shexec = null; 038 private String exceptionStackTrace; 039 040 /** Pattern used for searching in the output of the node health script */ 041 static private final String ERROR_PATTERN = "ERROR"; 042 043 private String healthCheckScript; 044 private long scriptTimeout; 045 046 enum HealthCheckerExitStatus { 047 SUCCESS, 048 TIMED_OUT, 049 FAILED_WITH_EXIT_CODE, 050 FAILED_WITH_EXCEPTION, 051 FAILED 052 } 053 054 /** 055 * Initialize. 056 * 057 * @param location the location of the health script 058 * @param timeout the timeout to be used for the health script 059 */ 060 public void init(String location, long timeout) { 061 this.healthCheckScript = location; 062 this.scriptTimeout = timeout; 063 ArrayList<String> execScript = new ArrayList<>(); 064 execScript.add(healthCheckScript); 065 this.shexec = new ShellCommandExecutor(execScript.toArray(new String[execScript.size()]), null, 066 null, scriptTimeout); 067 LOG.info("HealthChecker initialized with script at " + this.healthCheckScript + 068 ", timeout=" + timeout); 069 } 070 071 public HealthReport checkHealth() { 072 HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS; 073 try { 074 // Calling this execute leaves around running executor threads. 075 shexec.execute(); 076 } catch (ExitCodeException e) { 077 // ignore the exit code of the script 078 LOG.warn("Caught exception : " + e + ",exit code:" + e.getExitCode()); 079 status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; 080 } catch (IOException e) { 081 LOG.warn("Caught exception : " + e); 082 status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION; 083 exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e); 084 } finally { 085 if (shexec.isTimedOut()) { 086 status = HealthCheckerExitStatus.TIMED_OUT; 087 } 088 if (status == HealthCheckerExitStatus.SUCCESS) { 089 if (hasErrors(shexec.getOutput())) { 090 status = HealthCheckerExitStatus.FAILED; 091 } 092 } 093 } 094 return new HealthReport(status, getHealthReport(status)); 095 } 096 097 private boolean hasErrors(String output) { 098 String[] splits = output.split("\n"); 099 for (String split : splits) { 100 if (split.startsWith(ERROR_PATTERN)) { 101 return true; 102 } 103 } 104 return false; 105 } 106 107 private String getHealthReport(HealthCheckerExitStatus status){ 108 String healthReport = null; 109 switch (status) { 110 case SUCCESS: 111 healthReport = "Server is healthy."; 112 break; 113 case TIMED_OUT: 114 healthReport = "Health script timed out"; 115 break; 116 case FAILED_WITH_EXCEPTION: 117 healthReport = exceptionStackTrace; 118 break; 119 case FAILED_WITH_EXIT_CODE: 120 healthReport = "Health script failed with exit code."; 121 break; 122 case FAILED: 123 healthReport = shexec.getOutput(); 124 break; 125 } 126 return healthReport; 127 } 128}