1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.Closeable;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.HashMap;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30 import java.util.TreeSet;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
33
34 import org.apache.commons.lang.time.StopWatch;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.hbase.AuthUtil;
39 import org.apache.hadoop.hbase.ChoreService;
40 import org.apache.hadoop.hbase.DoNotRetryIOException;
41 import org.apache.hadoop.hbase.HBaseConfiguration;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.HRegionLocation;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.ScheduledChore;
47 import org.apache.hadoop.hbase.ServerName;
48 import org.apache.hadoop.hbase.TableName;
49 import org.apache.hadoop.hbase.TableNotEnabledException;
50 import org.apache.hadoop.hbase.TableNotFoundException;
51 import org.apache.hadoop.hbase.client.Admin;
52 import org.apache.hadoop.hbase.client.Connection;
53 import org.apache.hadoop.hbase.client.ConnectionFactory;
54 import org.apache.hadoop.hbase.client.Get;
55 import org.apache.hadoop.hbase.client.RegionLocator;
56 import org.apache.hadoop.hbase.client.ResultScanner;
57 import org.apache.hadoop.hbase.client.Scan;
58 import org.apache.hadoop.hbase.client.Table;
59 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
60 import org.apache.hadoop.util.Tool;
61 import org.apache.hadoop.util.ToolRunner;
62
63
64
65
66
67
68
69
70
71
72
73
74 public final class Canary implements Tool {
75
76 public interface Sink {
77 public void publishReadFailure(HRegionInfo region, Exception e);
78 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
79 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
80 }
81
82
83 public interface ExtendedSink extends Sink {
84 public void publishReadFailure(String table, String server);
85 public void publishReadTiming(String table, String server, long msTime);
86 }
87
88
89
90 public static class StdOutSink implements Sink {
91 @Override
92 public void publishReadFailure(HRegionInfo region, Exception e) {
93 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
94 }
95
96 @Override
97 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
98 LOG.error(String.format("read from region %s column family %s failed",
99 region.getRegionNameAsString(), column.getNameAsString()), e);
100 }
101
102 @Override
103 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
104 LOG.info(String.format("read from region %s column family %s in %dms",
105 region.getRegionNameAsString(), column.getNameAsString(), msTime));
106 }
107 }
108
109 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
110
111 @Override
112 public void publishReadFailure(String table, String server) {
113 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
114 }
115
116 @Override
117 public void publishReadTiming(String table, String server, long msTime) {
118 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
119 table, server, msTime));
120 }
121 }
122
123 private static final int USAGE_EXIT_CODE = 1;
124 private static final int INIT_ERROR_EXIT_CODE = 2;
125 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
126 private static final int ERROR_EXIT_CODE = 4;
127
128 private static final long DEFAULT_INTERVAL = 6000;
129
130 private static final long DEFAULT_TIMEOUT = 600000;
131
132 private static final Log LOG = LogFactory.getLog(Canary.class);
133
134 private Configuration conf = null;
135 private long interval = 0;
136 private Sink sink = null;
137
138 private boolean useRegExp;
139 private long timeout = DEFAULT_TIMEOUT;
140 private boolean failOnError = true;
141 private boolean regionServerMode = false;
142
143 public Canary() {
144 this(new RegionServerStdOutSink());
145 }
146
147 public Canary(Sink sink) {
148 this.sink = sink;
149 }
150
151 @Override
152 public Configuration getConf() {
153 return conf;
154 }
155
156 @Override
157 public void setConf(Configuration conf) {
158 this.conf = conf;
159 }
160
161 @Override
162 public int run(String[] args) throws Exception {
163 int index = -1;
164 ChoreService choreService = null;
165
166
167 for (int i = 0; i < args.length; i++) {
168 String cmd = args[i];
169
170 if (cmd.startsWith("-")) {
171 if (index >= 0) {
172
173 System.err.println("Invalid command line options");
174 printUsageAndExit();
175 }
176
177 if (cmd.equals("-help")) {
178
179 printUsageAndExit();
180 } else if (cmd.equals("-daemon") && interval == 0) {
181
182 interval = DEFAULT_INTERVAL;
183 } else if (cmd.equals("-interval")) {
184
185 i++;
186
187 if (i == args.length) {
188 System.err.println("-interval needs a numeric value argument.");
189 printUsageAndExit();
190 }
191
192 try {
193 interval = Long.parseLong(args[i]) * 1000;
194 } catch (NumberFormatException e) {
195 System.err.println("-interval needs a numeric value argument.");
196 printUsageAndExit();
197 }
198 } else if(cmd.equals("-regionserver")) {
199 this.regionServerMode = true;
200 } else if (cmd.equals("-e")) {
201 this.useRegExp = true;
202 } else if (cmd.equals("-t")) {
203 i++;
204
205 if (i == args.length) {
206 System.err.println("-t needs a numeric value argument.");
207 printUsageAndExit();
208 }
209
210 try {
211 this.timeout = Long.parseLong(args[i]);
212 } catch (NumberFormatException e) {
213 System.err.println("-t needs a numeric value argument.");
214 printUsageAndExit();
215 }
216
217 } else if (cmd.equals("-f")) {
218 i++;
219
220 if (i == args.length) {
221 System.err
222 .println("-f needs a boolean value argument (true|false).");
223 printUsageAndExit();
224 }
225
226 this.failOnError = Boolean.parseBoolean(args[i]);
227 } else {
228
229 System.err.println(cmd + " options is invalid.");
230 printUsageAndExit();
231 }
232 } else if (index < 0) {
233
234 index = i;
235 }
236 }
237
238
239
240
241 final ScheduledChore authChore = AuthUtil.getAuthChore(conf);
242 if (authChore != null) {
243 choreService = new ChoreService("CANARY_TOOL");
244 choreService.scheduleChore(authChore);
245 }
246
247
248 Monitor monitor = null;
249 Thread monitorThread = null;
250 long startTime = 0;
251 long currentTimeLength = 0;
252
253
254
255 try (Connection connection = ConnectionFactory.createConnection(this.conf)) {
256 do {
257
258 try {
259 monitor = this.newMonitor(connection, index, args);
260 monitorThread = new Thread(monitor);
261 startTime = System.currentTimeMillis();
262 monitorThread.start();
263 while (!monitor.isDone()) {
264
265 Thread.sleep(1000);
266
267 if (this.failOnError && monitor.hasError()) {
268 monitorThread.interrupt();
269 if (monitor.initialized) {
270 System.exit(monitor.errorCode);
271 } else {
272 System.exit(INIT_ERROR_EXIT_CODE);
273 }
274 }
275 currentTimeLength = System.currentTimeMillis() - startTime;
276 if (currentTimeLength > this.timeout) {
277 LOG.error("The monitor is running too long (" + currentTimeLength
278 + ") after timeout limit:" + this.timeout
279 + " will be killed itself !!");
280 if (monitor.initialized) {
281 System.exit(TIMEOUT_ERROR_EXIT_CODE);
282 } else {
283 System.exit(INIT_ERROR_EXIT_CODE);
284 }
285 break;
286 }
287 }
288
289 if (this.failOnError && monitor.hasError()) {
290 monitorThread.interrupt();
291 System.exit(monitor.errorCode);
292 }
293 } finally {
294 if (monitor != null) monitor.close();
295 }
296
297 Thread.sleep(interval);
298 } while (interval > 0);
299 }
300
301 if (choreService != null) {
302 choreService.shutdown();
303 }
304 return(monitor.errorCode);
305 }
306
307 private void printUsageAndExit() {
308 System.err.printf(
309 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
310 getClass().getName());
311 System.err.println(" where [opts] are:");
312 System.err.println(" -help Show this help and exit.");
313 System.err.println(" -regionserver replace the table argument to regionserver,");
314 System.err.println(" which means to enable regionserver mode");
315 System.err.println(" -daemon Continuous check at defined intervals.");
316 System.err.println(" -interval <N> Interval between checks (sec)");
317 System.err.println(" -e Use region/regionserver as regular expression");
318 System.err.println(" which means the region/regionserver is regular expression pattern");
319 System.err.println(" -f <B> stop whole program if first error occurs," +
320 " default is true");
321 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
322 System.exit(USAGE_EXIT_CODE);
323 }
324
325
326
327
328
329
330
331
332 public Monitor newMonitor(final Connection connection, int index, String[] args) {
333 Monitor monitor = null;
334 String[] monitorTargets = null;
335
336 if(index >= 0) {
337 int length = args.length - index;
338 monitorTargets = new String[length];
339 System.arraycopy(args, index, monitorTargets, 0, length);
340 }
341
342 if(this.regionServerMode) {
343 monitor = new RegionServerMonitor(
344 connection,
345 monitorTargets,
346 this.useRegExp,
347 (ExtendedSink)this.sink);
348 } else {
349 monitor = new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink);
350 }
351 return monitor;
352 }
353
354
355 public static abstract class Monitor implements Runnable, Closeable {
356
357 protected Connection connection;
358 protected Admin admin;
359 protected String[] targets;
360 protected boolean useRegExp;
361 protected boolean initialized = false;
362
363 protected boolean done = false;
364 protected int errorCode = 0;
365 protected Sink sink;
366
367 public boolean isDone() {
368 return done;
369 }
370
371 public boolean hasError() {
372 return errorCode != 0;
373 }
374
375 @Override
376 public void close() throws IOException {
377 if (this.admin != null) this.admin.close();
378 }
379
380 protected Monitor(Connection connection, String[] monitorTargets,
381 boolean useRegExp, Sink sink) {
382 if (null == connection) throw new IllegalArgumentException("connection shall not be null");
383
384 this.connection = connection;
385 this.targets = monitorTargets;
386 this.useRegExp = useRegExp;
387 this.sink = sink;
388 }
389
390 public abstract void run();
391
392 protected boolean initAdmin() {
393 if (null == this.admin) {
394 try {
395 this.admin = this.connection.getAdmin();
396 } catch (Exception e) {
397 LOG.error("Initial HBaseAdmin failed...", e);
398 this.errorCode = INIT_ERROR_EXIT_CODE;
399 }
400 } else if (admin.isAborted()) {
401 LOG.error("HBaseAdmin aborted");
402 this.errorCode = INIT_ERROR_EXIT_CODE;
403 }
404 return !this.hasError();
405 }
406 }
407
408
409 private static class RegionMonitor extends Monitor {
410
411 public RegionMonitor(Connection connection, String[] monitorTargets,
412 boolean useRegExp, Sink sink) {
413 super(connection, monitorTargets, useRegExp, sink);
414 }
415
416 @Override
417 public void run() {
418 if(this.initAdmin()) {
419 try {
420 if (this.targets != null && this.targets.length > 0) {
421 String[] tables = generateMonitorTables(this.targets);
422 this.initialized = true;
423 for (String table : tables) {
424 Canary.sniff(admin, sink, table);
425 }
426 } else {
427 sniff();
428 }
429 } catch (Exception e) {
430 LOG.error("Run regionMonitor failed", e);
431 this.errorCode = ERROR_EXIT_CODE;
432 }
433 }
434 this.done = true;
435 }
436
437 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
438 String[] returnTables = null;
439
440 if(this.useRegExp) {
441 Pattern pattern = null;
442 HTableDescriptor[] tds = null;
443 Set<String> tmpTables = new TreeSet<String>();
444 try {
445 for (String monitorTarget : monitorTargets) {
446 pattern = Pattern.compile(monitorTarget);
447 tds = this.admin.listTables(pattern);
448 if (tds != null) {
449 for (HTableDescriptor td : tds) {
450 tmpTables.add(td.getNameAsString());
451 }
452 }
453 }
454 } catch(IOException e) {
455 LOG.error("Communicate with admin failed", e);
456 throw e;
457 }
458
459 if(tmpTables.size() > 0) {
460 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
461 } else {
462 String msg = "No HTable found, tablePattern:"
463 + Arrays.toString(monitorTargets);
464 LOG.error(msg);
465 this.errorCode = INIT_ERROR_EXIT_CODE;
466 throw new TableNotFoundException(msg);
467 }
468 } else {
469 returnTables = monitorTargets;
470 }
471
472 return returnTables;
473 }
474
475
476
477
478 private void sniff() throws Exception {
479 for (HTableDescriptor table : admin.listTables()) {
480 Canary.sniff(admin, sink, table);
481 }
482 }
483
484 }
485
486
487
488
489
490 public static void sniff(final Admin admin, TableName tableName) throws Exception {
491 sniff(admin, new StdOutSink(), tableName.getNameAsString());
492 }
493
494
495
496
497
498 private static void sniff(final Admin admin, final Sink sink, String tableName)
499 throws Exception {
500 if (admin.isTableAvailable(TableName.valueOf(tableName))) {
501 sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)));
502 } else {
503 LOG.warn(String.format("Table %s is not available", tableName));
504 }
505 }
506
507
508
509
510 private static void sniff(final Admin admin, final Sink sink, HTableDescriptor tableDesc)
511 throws Exception {
512 Table table = null;
513
514 try {
515 table = admin.getConnection().getTable(tableDesc.getTableName());
516 } catch (TableNotFoundException e) {
517 return;
518 }
519
520 try {
521 for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) {
522 try {
523 sniffRegion(admin, sink, region, table);
524 } catch (Exception e) {
525 sink.publishReadFailure(region, e);
526 LOG.debug("sniffRegion failed", e);
527 }
528 }
529 } finally {
530 table.close();
531 }
532 }
533
534
535
536
537
538 private static void sniffRegion(
539 final Admin admin,
540 final Sink sink,
541 HRegionInfo region,
542 Table table) throws Exception {
543 HTableDescriptor tableDesc = table.getTableDescriptor();
544 byte[] startKey = null;
545 Get get = null;
546 Scan scan = null;
547 ResultScanner rs = null;
548 StopWatch stopWatch = new StopWatch();
549 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
550 stopWatch.reset();
551 startKey = region.getStartKey();
552
553 if (startKey.length > 0) {
554 get = new Get(startKey);
555 get.setCacheBlocks(false);
556 get.setFilter(new FirstKeyOnlyFilter());
557 get.addFamily(column.getName());
558 } else {
559 scan = new Scan();
560 scan.setRaw(true);
561 scan.setCaching(1);
562 scan.setCacheBlocks(false);
563 scan.setFilter(new FirstKeyOnlyFilter());
564 scan.addFamily(column.getName());
565 scan.setMaxResultSize(1L);
566 }
567
568 try {
569 if (startKey.length > 0) {
570 stopWatch.start();
571 table.get(get);
572 stopWatch.stop();
573 sink.publishReadTiming(region, column, stopWatch.getTime());
574 } else {
575 stopWatch.start();
576 rs = table.getScanner(scan);
577 stopWatch.stop();
578 sink.publishReadTiming(region, column, stopWatch.getTime());
579 }
580 } catch (Exception e) {
581 sink.publishReadFailure(region, column, e);
582 } finally {
583 if (rs != null) {
584 rs.close();
585 }
586 scan = null;
587 get = null;
588 startKey = null;
589 }
590 }
591 }
592
593 private static class RegionServerMonitor extends Monitor {
594
595 public RegionServerMonitor(Connection connection, String[] monitorTargets,
596 boolean useRegExp, ExtendedSink sink) {
597 super(connection, monitorTargets, useRegExp, sink);
598 }
599
600 private ExtendedSink getSink() {
601 return (ExtendedSink) this.sink;
602 }
603
604 @Override
605 public void run() {
606 if (this.initAdmin() && this.checkNoTableNames()) {
607 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
608 this.initialized = true;
609 this.monitorRegionServers(rsAndRMap);
610 }
611 this.done = true;
612 }
613
614 private boolean checkNoTableNames() {
615 List<String> foundTableNames = new ArrayList<String>();
616 TableName[] tableNames = null;
617
618 try {
619 tableNames = this.admin.listTableNames();
620 } catch (IOException e) {
621 LOG.error("Get listTableNames failed", e);
622 this.errorCode = INIT_ERROR_EXIT_CODE;
623 return false;
624 }
625
626 if (this.targets == null || this.targets.length == 0) return true;
627
628 for (String target : this.targets) {
629 for (TableName tableName : tableNames) {
630 if (target.equals(tableName.getNameAsString())) {
631 foundTableNames.add(target);
632 }
633 }
634 }
635
636 if (foundTableNames.size() > 0) {
637 System.err.println("Cannot pass a tablename when using the -regionserver " +
638 "option, tablenames:" + foundTableNames.toString());
639 this.errorCode = USAGE_EXIT_CODE;
640 }
641 return foundTableNames.size() == 0;
642 }
643
644 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
645 String serverName = null;
646 TableName tableName = null;
647 HRegionInfo region = null;
648 Table table = null;
649 Get get = null;
650 byte[] startKey = null;
651 Scan scan = null;
652 StopWatch stopWatch = new StopWatch();
653
654 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
655 stopWatch.reset();
656 serverName = entry.getKey();
657
658 region = entry.getValue().get(0);
659 try {
660 tableName = region.getTable();
661 table = admin.getConnection().getTable(tableName);
662 startKey = region.getStartKey();
663
664 if(startKey.length > 0) {
665 get = new Get(startKey);
666 stopWatch.start();
667 table.get(get);
668 stopWatch.stop();
669 } else {
670 scan = new Scan();
671 scan.setCaching(1);
672 scan.setMaxResultSize(1L);
673 stopWatch.start();
674 ResultScanner s = table.getScanner(scan);
675 s.close();
676 stopWatch.stop();
677 }
678 this.getSink().publishReadTiming(tableName.getNameAsString(),
679 serverName, stopWatch.getTime());
680 } catch (TableNotFoundException tnfe) {
681
682 } catch (TableNotEnabledException tnee) {
683
684 LOG.debug("The targeted table was disabled. Assuming success.");
685 } catch (DoNotRetryIOException dnrioe) {
686 this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
687 LOG.error(dnrioe);
688 } catch (IOException e) {
689 this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
690 LOG.error(e);
691 this.errorCode = ERROR_EXIT_CODE;
692 } finally {
693 if (table != null) {
694 try {
695 table.close();
696 } catch (IOException e) {
697 }
698 }
699 scan = null;
700 get = null;
701 startKey = null;
702 }
703 }
704 }
705
706 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
707 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
708 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
709 return regionServerAndRegionsMap;
710 }
711
712 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
713 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
714 Table table = null;
715 RegionLocator regionLocator = null;
716 try {
717 HTableDescriptor[] tableDescs = this.admin.listTables();
718 List<HRegionInfo> regions = null;
719 for (HTableDescriptor tableDesc : tableDescs) {
720 table = this.admin.getConnection().getTable(tableDesc.getTableName());
721 regionLocator = this.admin.getConnection().getRegionLocator(tableDesc.getTableName());
722
723 for (HRegionLocation location: regionLocator.getAllRegionLocations()) {
724 ServerName rs = location.getServerName();
725 String rsName = rs.getHostname();
726 HRegionInfo r = location.getRegionInfo();
727
728 if (rsAndRMap.containsKey(rsName)) {
729 regions = rsAndRMap.get(rsName);
730 } else {
731 regions = new ArrayList<HRegionInfo>();
732 rsAndRMap.put(rsName, regions);
733 }
734 regions.add(r);
735 }
736 table.close();
737 }
738
739 } catch (IOException e) {
740 String msg = "Get HTables info failed";
741 LOG.error(msg, e);
742 this.errorCode = INIT_ERROR_EXIT_CODE;
743 } finally {
744 if (table != null) {
745 try {
746 table.close();
747 } catch (IOException e) {
748 LOG.warn("Close table failed", e);
749 }
750 }
751 }
752
753 return rsAndRMap;
754 }
755
756 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
757 Map<String, List<HRegionInfo>> fullRsAndRMap) {
758
759 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
760
761 if (this.targets != null && this.targets.length > 0) {
762 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
763 Pattern pattern = null;
764 Matcher matcher = null;
765 boolean regExpFound = false;
766 for (String rsName : this.targets) {
767 if (this.useRegExp) {
768 regExpFound = false;
769 pattern = Pattern.compile(rsName);
770 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
771 matcher = pattern.matcher(entry.getKey());
772 if (matcher.matches()) {
773 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
774 regExpFound = true;
775 }
776 }
777 if (!regExpFound) {
778 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
779 }
780 } else {
781 if (fullRsAndRMap.containsKey(rsName)) {
782 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
783 } else {
784 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
785 }
786 }
787 }
788 } else {
789 filteredRsAndRMap = fullRsAndRMap;
790 }
791 return filteredRsAndRMap;
792 }
793 }
794
795 public static void main(String[] args) throws Exception {
796 final Configuration conf = HBaseConfiguration.create();
797 int exitCode = ToolRunner.run(conf, new Canary(), args);
798 System.exit(exitCode);
799 }
800 }