1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase;
19
20 import org.apache.commons.logging.Log;
21 import org.apache.commons.logging.LogFactory;
22 import org.apache.hadoop.conf.Configuration;
23 import org.apache.hadoop.hbase.HealthChecker.HealthCheckerExitStatus;
24 import org.apache.hadoop.util.StringUtils;
25
26
27
28
29 public class HealthCheckChore extends ScheduledChore {
30 private static final Log LOG = LogFactory.getLog(HealthCheckChore.class);
31 private HealthChecker healthChecker;
32 private Configuration config;
33 private int threshold;
34 private int numTimesUnhealthy = 0;
35 private long failureWindow;
36 private long startWindow;
37
38 public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) {
39 super("HealthChecker", stopper, sleepTime);
40 LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime));
41 this.config = conf;
42 String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC);
43 long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT,
44 HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT);
45 healthChecker = new HealthChecker();
46 healthChecker.init(healthCheckScript, scriptTimeout);
47 this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD,
48 HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD);
49 this.failureWindow = (long)this.threshold * (long)sleepTime;
50 }
51
52 @Override
53 protected void chore() {
54 HealthReport report = healthChecker.checkHealth();
55 boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS);
56 if (!isHealthy) {
57 boolean needToStop = decideToStop();
58 if (needToStop) {
59 getStopper().stop(
60 "The node reported unhealthy " + threshold + " number of times consecutively.");
61 }
62
63 LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : "
64 + report.getHealthReport());
65 }
66 }
67
68 private boolean decideToStop() {
69 boolean stop = false;
70 if (numTimesUnhealthy == 0) {
71
72
73 numTimesUnhealthy++;
74 startWindow = System.currentTimeMillis();
75 } else {
76 if ((System.currentTimeMillis() - startWindow) < failureWindow) {
77 numTimesUnhealthy++;
78 if (numTimesUnhealthy == threshold) {
79 stop = true;
80 } else {
81 stop = false;
82 }
83 } else {
84
85 numTimesUnhealthy = 1;
86 startWindow = System.currentTimeMillis();
87 stop = false;
88 }
89 }
90 return stop;
91 }
92
93 }