View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.tool;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  
25  import org.apache.hadoop.util.Tool;
26  import org.apache.hadoop.util.ToolRunner;
27  
28  import org.apache.hadoop.conf.Configuration;
29  
30  import org.apache.hadoop.hbase.HRegionInfo;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.HColumnDescriptor;
33  import org.apache.hadoop.hbase.HBaseConfiguration;
34  import org.apache.hadoop.hbase.TableNotFoundException;
35  
36  import org.apache.hadoop.hbase.client.Get;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  
40  /**
41   * HBase Canary Tool, that that can be used to do
42   * "canary monitoring" of a running HBase cluster.
43   *
44   * Foreach region tries to get one row per column family
45   * and outputs some information about failure or latency.
46   */
47  public final class Canary implements Tool {
48    // Sink interface used by the canary to outputs information
49    public interface Sink {
50      public void publishReadFailure(HRegionInfo region);
51      public void publishReadFailure(HRegionInfo region, HColumnDescriptor column);
52      public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
53    }
54  
55    // Simple implementation of canary sink that allows to plot on
56    // file or standard output timings or failures.
57    public static class StdOutSink implements Sink {
58      @Override
59      public void publishReadFailure(HRegionInfo region) {
60        LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()));
61      }
62  
63      @Override
64      public void publishReadFailure(HRegionInfo region, HColumnDescriptor column) {
65        LOG.error(String.format("read from region %s column family %s failed",
66                  region.getRegionNameAsString(), column.getNameAsString()));
67      }
68  
69      @Override
70      public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
71        LOG.info(String.format("read from region %s column family %s in %dms",
72                 region.getRegionNameAsString(), column.getNameAsString(), msTime));
73      }
74    }
75  
76    private static final long DEFAULT_INTERVAL = 6000;
77  
78    private static final Log LOG = LogFactory.getLog(Canary.class);
79  
80    private Configuration conf = null;
81    private HBaseAdmin admin = null;
82    private long interval = 0;
83    private Sink sink = null;
84  
85    public Canary() {
86      this(new StdOutSink());
87    }
88  
89    public Canary(Sink sink) {
90      this.sink = sink;
91    }
92  
93    @Override
94    public Configuration getConf() {
95      return conf;
96    }
97  
98    @Override
99    public void setConf(Configuration conf) {
100     this.conf = conf;
101   }
102 
103   @Override
104   public int run(String[] args) throws Exception {
105     int tables_index = -1;
106 
107     // Process command line args
108     for (int i = 0; i < args.length; i++) {
109       String cmd = args[i];
110 
111       if (cmd.startsWith("-")) {
112         if (tables_index >= 0) {
113           // command line args must be in the form: [opts] [table 1 [table 2 ...]]
114           System.err.println("Invalid command line options");
115           printUsageAndExit();
116         }
117 
118         if (cmd.equals("-help")) {
119           // user asked for help, print the help and quit.
120           printUsageAndExit();
121         } else if (cmd.equals("-daemon") && interval == 0) {
122           // user asked for daemon mode, set a default interval between checks
123           interval = DEFAULT_INTERVAL;
124         } else if (cmd.equals("-interval")) {
125           // user has specified an interval for canary breaths (-interval N)
126           i++;
127 
128           if (i == args.length) {
129             System.err.println("-interval needs a numeric value argument.");
130             printUsageAndExit();
131           }
132 
133           try {
134             interval = Long.parseLong(args[i]) * 1000;
135           } catch (NumberFormatException e) {
136             System.err.println("-interval needs a numeric value argument.");
137             printUsageAndExit();
138           }
139         } else {
140           // no options match
141           System.err.println(cmd + " options is invalid.");
142           printUsageAndExit();
143         }
144       } else if (tables_index < 0) {
145         // keep track of first table name specified by the user
146         tables_index = i;
147       }
148     }
149 
150     // initialize HBase conf and admin
151     if (conf == null) conf = HBaseConfiguration.create();
152     admin = new HBaseAdmin(conf);
153 
154     // lets the canary monitor the cluster
155     do {
156       if (admin.isAborted()) {
157         LOG.error("HBaseAdmin aborted");
158         return(1);
159       }
160 
161       if (tables_index >= 0) {
162         for (int i = tables_index; i < args.length; i++) {
163           sniff(args[i]);
164         }
165       } else {
166         sniff();
167       }
168 
169       Thread.sleep(interval);
170     } while (interval > 0);
171 
172     return(0);
173   }
174 
175   private void printUsageAndExit() {
176     System.err.printf("Usage: bin/hbase %s [opts] [table 1 [table 2...]]\n", getClass().getName());
177     System.err.println(" where [opts] are:");
178     System.err.println("   -help          Show this help and exit.");
179     System.err.println("   -daemon        Continuous check at defined intervals.");
180     System.err.println("   -interval <N>  Interval between checks (sec)");
181     System.exit(1);
182   }
183 
184   /*
185    * canary entry point to monitor all the tables.
186    */
187   private void sniff() throws Exception {
188     for (HTableDescriptor table : admin.listTables()) {
189       sniff(table);
190     }
191   }
192 
193   /*
194    * canary entry point to monitor specified table.
195    */
196   private void sniff(String tableName) throws Exception {
197     if (admin.isTableAvailable(tableName)) {
198       sniff(admin.getTableDescriptor(tableName.getBytes()));
199     } else {
200       LOG.warn(String.format("Table %s is not available", tableName));
201     }
202   }
203 
204   /*
205    * Loops over regions that owns this table,
206    * and output some information abouts the state.
207    */
208   private void sniff(HTableDescriptor tableDesc) throws Exception {
209     HTable table = null;
210 
211     try {
212       table = new HTable(admin.getConfiguration(), tableDesc.getName());
213     } catch (TableNotFoundException e) {
214       return;
215     }
216 
217     for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
218       try {
219         sniffRegion(region, table);
220       } catch (Exception e) {
221         sink.publishReadFailure(region);
222       }
223     }
224   }
225 
226   /*
227    * For each column family of the region tries to get one row
228    * and outputs the latency, or the failure.
229    */
230   private void sniffRegion(HRegionInfo region, HTable table) throws Exception {
231     HTableDescriptor tableDesc = table.getTableDescriptor();
232     for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
233       Get get = new Get(region.getStartKey());
234       get.addFamily(column.getName());
235 
236       try {
237         long startTime = System.currentTimeMillis();
238         table.get(get);
239         long time = System.currentTimeMillis() - startTime;
240 
241         sink.publishReadTiming(region, column, time);
242       } catch (Exception e) {
243         sink.publishReadFailure(region, column);
244       }
245     }
246   }
247 
248   public static void main(String[] args) throws Exception {
249     int exitCode = ToolRunner.run(new Canary(), args);
250     System.exit(exitCode);
251   }
252 }
253