View Javadoc

1   /**
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.hbase.HealthChecker.HealthCheckerExitStatus;
26  import org.apache.hadoop.util.StringUtils;
27  
28  /**
29   * The Class HealthCheckChore for running health checker regularly.
30   */
31   public class HealthCheckChore extends Chore {
32    private static Log LOG = LogFactory.getLog(HealthCheckChore.class);
33    private HealthChecker healthChecker;
34    private Configuration config;
35    private int threshold;
36    private int numTimesUnhealthy = 0;
37    private long failureWindow;
38    private long startWindow;
39  
40    public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) {
41      super("HealthChecker", sleepTime, stopper);
42      LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime));
43      this.config = conf;
44      String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC);
45      long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT,
46        HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT);
47      healthChecker = new HealthChecker();
48      healthChecker.init(healthCheckScript, scriptTimeout);
49      this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD,
50        HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD);
51      this.failureWindow = (long)this.threshold * (long)sleepTime;
52    }
53  
54    @Override
55    protected void chore() {
56      HealthReport report = healthChecker.checkHealth();
57      boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS);
58      if (!isHealthy) {
59        boolean needToStop = decideToStop();
60        if (needToStop) {
61          this.stopper.stop("The  node reported unhealthy " + threshold
62              + " number of times consecutively.");
63        }
64        // Always log health report.
65        LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : "
66            + report.getHealthReport());
67      }
68    }
69  
70    private boolean decideToStop() {
71      boolean stop = false;
72      if (numTimesUnhealthy == 0) {
73        // First time we are seeing a failure. No need to stop, just
74        // record the time.
75        numTimesUnhealthy++;
76        startWindow = System.currentTimeMillis();
77      } else {
78        if ((System.currentTimeMillis() - startWindow) < failureWindow) {
79          numTimesUnhealthy++;
80          if (numTimesUnhealthy == threshold) {
81            stop = true;
82          } else {
83            stop = false;
84          }
85        } else {
86          // Outside of failure window, so we reset to 1.
87          numTimesUnhealthy = 1;
88          startWindow = System.currentTimeMillis();
89          stop = false;
90        }
91      }
92      return stop;
93    }
94  
95  }