1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.io.StringWriter;
23 import java.net.URI;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Map.Entry;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.SortedSet;
38 import java.util.TreeMap;
39 import java.util.TreeSet;
40 import java.util.concurrent.Callable;
41 import java.util.concurrent.ConcurrentSkipListMap;
42 import java.util.concurrent.ExecutionException;
43 import java.util.concurrent.ExecutorService;
44 import java.util.concurrent.Future;
45 import java.util.concurrent.ScheduledThreadPoolExecutor;
46 import java.util.concurrent.atomic.AtomicInteger;
47
48 import org.apache.commons.logging.Log;
49 import org.apache.commons.logging.LogFactory;
50 import org.apache.hadoop.classification.InterfaceAudience;
51 import org.apache.hadoop.classification.InterfaceStability;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.conf.Configured;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.fs.permission.FsAction;
58 import org.apache.hadoop.hbase.Abortable;
59 import org.apache.hadoop.hbase.ClusterStatus;
60 import org.apache.hadoop.hbase.HBaseConfiguration;
61 import org.apache.hadoop.hbase.HColumnDescriptor;
62 import org.apache.hadoop.hbase.HConstants;
63 import org.apache.hadoop.hbase.HRegionInfo;
64 import org.apache.hadoop.hbase.HRegionLocation;
65 import org.apache.hadoop.hbase.HTableDescriptor;
66 import org.apache.hadoop.hbase.KeyValue;
67 import org.apache.hadoop.hbase.ServerName;
68 import org.apache.hadoop.hbase.catalog.MetaEditor;
69 import org.apache.hadoop.hbase.client.Delete;
70 import org.apache.hadoop.hbase.client.Get;
71 import org.apache.hadoop.hbase.client.HBaseAdmin;
72 import org.apache.hadoop.hbase.client.HConnectable;
73 import org.apache.hadoop.hbase.client.HConnection;
74 import org.apache.hadoop.hbase.client.HConnectionManager;
75 import org.apache.hadoop.hbase.client.HTable;
76 import org.apache.hadoop.hbase.client.MetaScanner;
77 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
78 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
79 import org.apache.hadoop.hbase.client.Put;
80 import org.apache.hadoop.hbase.client.Result;
81 import org.apache.hadoop.hbase.client.RowMutations;
82 import org.apache.hadoop.hbase.exceptions.MasterNotRunningException;
83 import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
84 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
85 import org.apache.hadoop.hbase.io.hfile.HFile;
86 import org.apache.hadoop.hbase.master.MasterFileSystem;
87 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
88 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
89 import org.apache.hadoop.hbase.regionserver.HRegion;
90 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
91 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
92 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
93 import org.apache.hadoop.hbase.security.User;
94 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
95 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
96 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
97 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
98 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
99 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
100 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
101 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
102 import org.apache.hadoop.security.AccessControlException;
103 import org.apache.hadoop.security.UserGroupInformation;
104 import org.apache.hadoop.util.ReflectionUtils;
105 import org.apache.hadoop.util.Tool;
106 import org.apache.hadoop.util.ToolRunner;
107 import org.apache.zookeeper.KeeperException;
108
109 import com.google.common.base.Joiner;
110 import com.google.common.base.Preconditions;
111 import com.google.common.collect.Lists;
112 import com.google.common.collect.Multimap;
113 import com.google.common.collect.TreeMultimap;
114 import com.google.protobuf.ServiceException;
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161 @InterfaceAudience.Public
162 @InterfaceStability.Evolving
163 public class HBaseFsck extends Configured implements Tool {
164 public static final long DEFAULT_TIME_LAG = 60000;
165 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
166 private static final int MAX_NUM_THREADS = 50;
167 private static boolean rsSupportsOffline = true;
168 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
169 private static final int DEFAULT_MAX_MERGE = 5;
170 private static final String TO_BE_LOADED = "to_be_loaded";
171
172
173
174
175 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
176 private ClusterStatus status;
177 private HConnection connection;
178 private HBaseAdmin admin;
179 private HTable meta;
180 protected ExecutorService executor;
181 private long startMillis = System.currentTimeMillis();
182 private HFileCorruptionChecker hfcc;
183 private int retcode = 0;
184
185
186
187
188 private static boolean details = false;
189 private long timelag = DEFAULT_TIME_LAG;
190 private boolean fixAssignments = false;
191 private boolean fixMeta = false;
192 private boolean checkHdfs = true;
193 private boolean fixHdfsHoles = false;
194 private boolean fixHdfsOverlaps = false;
195 private boolean fixHdfsOrphans = false;
196 private boolean fixTableOrphans = false;
197 private boolean fixVersionFile = false;
198 private boolean fixSplitParents = false;
199 private boolean fixReferenceFiles = false;
200 private boolean fixEmptyMetaCells = false;
201 private boolean fixTableLocks = false;
202
203
204
205 private Set<String> tablesIncluded = new HashSet<String>();
206 private int maxMerge = DEFAULT_MAX_MERGE;
207 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
208 private boolean sidelineBigOverlaps = false;
209 private Path sidelineDir = null;
210
211 private boolean rerun = false;
212 private static boolean summary = false;
213 private boolean checkMetaOnly = false;
214 private boolean ignorePreCheckPermission = false;
215
216
217
218
219 final private ErrorReporter errors;
220 int fixes = 0;
221
222
223
224
225
226
227 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
228 private TreeSet<byte[]> disabledTables =
229 new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
230
231 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
232
233
234
235
236
237
238
239
240
241
242
243 private SortedMap<String, TableInfo> tablesInfo = new ConcurrentSkipListMap<String,TableInfo>();
244
245
246
247
248 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
249
250 private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
251
252
253
254
255
256
257
258
259 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
260 ZooKeeperConnectionException, IOException, ClassNotFoundException {
261 super(conf);
262 errors = getErrorReporter(conf);
263
264 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
265 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
266 }
267
268
269
270
271
272
273
274
275
276
277
278 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
279 ZooKeeperConnectionException, IOException, ClassNotFoundException {
280 super(conf);
281 errors = getErrorReporter(getConf());
282 this.executor = exec;
283 }
284
285
286
287
288
289 public void connect() throws IOException {
290 admin = new HBaseAdmin(getConf());
291 meta = new HTable(getConf(), HConstants.META_TABLE_NAME);
292 status = admin.getClusterStatus();
293 connection = admin.getConnection();
294 }
295
296
297
298
299 private void loadDeployedRegions() throws IOException, InterruptedException {
300
301 Collection<ServerName> regionServers = status.getServers();
302 errors.print("Number of live region servers: " + regionServers.size());
303 if (details) {
304 for (ServerName rsinfo: regionServers) {
305 errors.print(" " + rsinfo.getServerName());
306 }
307 }
308
309
310 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
311 errors.print("Number of dead region servers: " + deadRegionServers.size());
312 if (details) {
313 for (ServerName name: deadRegionServers) {
314 errors.print(" " + name);
315 }
316 }
317
318
319 errors.print("Master: " + status.getMaster());
320
321
322 Collection<ServerName> backupMasters = status.getBackupMasters();
323 errors.print("Number of backup masters: " + backupMasters.size());
324 if (details) {
325 for (ServerName name: backupMasters) {
326 errors.print(" " + name);
327 }
328 }
329
330
331 processRegionServers(regionServers);
332 }
333
334
335
336
337 private void clearState() {
338
339 fixes = 0;
340 regionInfoMap.clear();
341 emptyRegionInfoQualifiers.clear();
342 disabledTables.clear();
343 errors.clear();
344 tablesInfo.clear();
345 orphanHdfsDirs.clear();
346 }
347
348
349
350
351
352
353 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
354
355 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
356 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
357 LOG.info("Loading regioninfos HDFS");
358
359 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
360 int curIter = 0;
361 do {
362 clearState();
363
364 restoreHdfsIntegrity();
365 curIter++;
366 } while (fixes > 0 && curIter <= maxIterations);
367
368
369
370 if (curIter > 2) {
371 if (curIter == maxIterations) {
372 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
373 + "Tables integrity may not be fully repaired!");
374 } else {
375 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
376 }
377 }
378 }
379 }
380
381
382
383
384
385
386
387
388
389 public int onlineConsistencyRepair() throws IOException, KeeperException,
390 InterruptedException {
391 clearState();
392
393 LOG.info("Loading regionsinfo from the .META. table");
394 boolean success = loadMetaEntries();
395 if (!success) return -1;
396
397
398 if (!checkMetaRegion()) {
399
400 errors.reportError("Encountered fatal error. Exiting...");
401 return -2;
402 }
403
404
405 reportEmptyMetaCells();
406
407
408 if (shouldFixEmptyMetaCells()) {
409 fixEmptyMetaCells();
410 }
411
412
413 if (!checkMetaOnly) {
414 reportTablesInFlux();
415 }
416
417
418 loadDeployedRegions();
419
420
421 if (shouldCheckHdfs()) {
422 loadHdfsRegionDirs();
423 loadHdfsRegionInfos();
424 }
425
426
427 loadDisabledTables();
428
429
430 fixOrphanTables();
431
432
433 checkAndFixConsistency();
434
435
436 checkIntegrity();
437 return errors.getErrorList().size();
438 }
439
440
441
442
443
444 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
445
446 errors.print("Version: " + status.getHBaseVersion());
447 offlineHdfsIntegrityRepair();
448
449
450 boolean oldBalancer = admin.setBalancerRunning(false, true);
451 try {
452 onlineConsistencyRepair();
453 }
454 finally {
455 admin.setBalancerRunning(oldBalancer, false);
456 }
457
458 offlineReferenceFileRepair();
459
460 checkAndFixTableLocks();
461
462
463 printTableSummary(tablesInfo);
464 return errors.summarize();
465 }
466
467
468
469
470 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
471 for (HbckInfo hi : orphanHdfsDirs) {
472 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
473 adoptHdfsOrphan(hi);
474 }
475 }
476
477
478
479
480
481
482
483
484
485
486 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
487 Path p = hi.getHdfsRegionDir();
488 FileSystem fs = p.getFileSystem(getConf());
489 FileStatus[] dirs = fs.listStatus(p);
490 if (dirs == null) {
491 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
492 p + ". This dir could probably be deleted.");
493 return ;
494 }
495
496 String tableName = Bytes.toString(hi.getTableName());
497 TableInfo tableInfo = tablesInfo.get(tableName);
498 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
499 HTableDescriptor template = tableInfo.getHTD();
500
501
502 Pair<byte[],byte[]> orphanRegionRange = null;
503 for (FileStatus cf : dirs) {
504 String cfName= cf.getPath().getName();
505
506 if (cfName.startsWith(".") || cfName.equals("splitlog")) continue;
507
508 FileStatus[] hfiles = fs.listStatus(cf.getPath());
509 for (FileStatus hfile : hfiles) {
510 byte[] start, end;
511 HFile.Reader hf = null;
512 try {
513 CacheConfig cacheConf = new CacheConfig(getConf());
514 hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
515 hf.loadFileInfo();
516 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
517 start = startKv.getRow();
518 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
519 end = endKv.getRow();
520 } catch (IOException ioe) {
521 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
522 continue;
523 } catch (NullPointerException ioe) {
524 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
525 continue;
526 } finally {
527 if (hf != null) {
528 hf.close();
529 }
530 }
531
532
533 if (orphanRegionRange == null) {
534
535 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
536 } else {
537
538
539
540 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
541 orphanRegionRange.setFirst(start);
542 }
543 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
544 orphanRegionRange.setSecond(end);
545 }
546 }
547 }
548 }
549 if (orphanRegionRange == null) {
550 LOG.warn("No data in dir " + p + ", sidelining data");
551 fixes++;
552 sidelineRegionDir(fs, hi);
553 return;
554 }
555 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
556 Bytes.toString(orphanRegionRange.getSecond()) + ")");
557
558
559 HRegionInfo hri = new HRegionInfo(template.getName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
560 LOG.info("Creating new region : " + hri);
561 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
562 Path target = region.getRegionFileSystem().getRegionDir();
563
564
565 mergeRegionDirs(target, hi);
566 fixes++;
567 }
568
569
570
571
572
573
574
575
576
577 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
578
579 LOG.info("Loading HBase regioninfo from HDFS...");
580 loadHdfsRegionDirs();
581
582 int errs = errors.getErrorList().size();
583
584 tablesInfo = loadHdfsRegionInfos();
585 checkHdfsIntegrity(false, false);
586
587 if (errors.getErrorList().size() == errs) {
588 LOG.info("No integrity errors. We are done with this phase. Glorious.");
589 return 0;
590 }
591
592 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
593 adoptHdfsOrphans(orphanHdfsDirs);
594
595 }
596
597
598 if (shouldFixHdfsHoles()) {
599 clearState();
600 loadHdfsRegionDirs();
601 tablesInfo = loadHdfsRegionInfos();
602 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
603 }
604
605
606 if (shouldFixHdfsOverlaps()) {
607
608 clearState();
609 loadHdfsRegionDirs();
610 tablesInfo = loadHdfsRegionInfos();
611 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
612 }
613
614 return errors.getErrorList().size();
615 }
616
617
618
619
620
621
622
623
624
625 private void offlineReferenceFileRepair() throws IOException {
626 Configuration conf = getConf();
627 Path hbaseRoot = FSUtils.getRootDir(conf);
628 FileSystem fs = hbaseRoot.getFileSystem(conf);
629 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
630 for (Path path: allFiles.values()) {
631 boolean isReference = false;
632 try {
633 isReference = StoreFileInfo.isReference(path);
634 } catch (Throwable t) {
635
636
637
638
639 }
640 if (!isReference) continue;
641
642 Path referredToFile = StoreFileInfo.getReferredToFile(path);
643 if (fs.exists(referredToFile)) continue;
644
645
646 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
647 "Found lingering reference file " + path);
648 if (!shouldFixReferenceFiles()) continue;
649
650
651 boolean success = false;
652 String pathStr = path.toString();
653
654
655
656
657
658 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
659 for (int i = 0; index > 0 && i < 3; i++) {
660 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index);
661 }
662 if (index > 0) {
663 Path rootDir = getSidelineDir();
664 Path dst = new Path(rootDir, pathStr.substring(index));
665 fs.mkdirs(dst.getParent());
666 LOG.info("Trying to sildeline reference file"
667 + path + " to " + dst);
668 setShouldRerun();
669
670 success = fs.rename(path, dst);
671 }
672 if (!success) {
673 LOG.error("Failed to sideline reference file " + path);
674 }
675 }
676 }
677
678
679
680
681 private void reportEmptyMetaCells() {
682 errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
683 emptyRegionInfoQualifiers.size());
684 if (details) {
685 for (Result r: emptyRegionInfoQualifiers) {
686 errors.print(" " + r);
687 }
688 }
689 }
690
691
692
693
694 private void reportTablesInFlux() {
695 AtomicInteger numSkipped = new AtomicInteger(0);
696 HTableDescriptor[] allTables = getTables(numSkipped);
697 errors.print("Number of Tables: " + allTables.length);
698 if (details) {
699 if (numSkipped.get() > 0) {
700 errors.detail("Number of Tables in flux: " + numSkipped.get());
701 }
702 for (HTableDescriptor td : allTables) {
703 String tableName = td.getNameAsString();
704 errors.detail(" Table: " + tableName + "\t" +
705 (td.isReadOnly() ? "ro" : "rw") + "\t" +
706 (td.isMetaRegion() ? "META" : " ") + "\t" +
707 " families: " + td.getFamilies().size());
708 }
709 }
710 }
711
712 public ErrorReporter getErrors() {
713 return errors;
714 }
715
716
717
718
719
720 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
721 Path regionDir = hbi.getHdfsRegionDir();
722 if (regionDir == null) {
723 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
724 return;
725 }
726
727 if (hbi.hdfsEntry.hri != null) {
728
729 return;
730 }
731
732 FileSystem fs = FileSystem.get(getConf());
733 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
734 LOG.debug("HRegionInfo read: " + hri.toString());
735 hbi.hdfsEntry.hri = hri;
736 }
737
738
739
740
741
742 public static class RegionRepairException extends IOException {
743 private static final long serialVersionUID = 1L;
744 final IOException ioe;
745 public RegionRepairException(String s, IOException ioe) {
746 super(s);
747 this.ioe = ioe;
748 }
749 }
750
751
752
753
754 private SortedMap<String, TableInfo> loadHdfsRegionInfos() throws IOException, InterruptedException {
755 tablesInfo.clear();
756
757 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
758
759
760 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
761 List<Future<Void>> hbiFutures;
762
763 for (HbckInfo hbi : hbckInfos) {
764 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
765 hbis.add(work);
766 }
767
768
769 hbiFutures = executor.invokeAll(hbis);
770
771 for(int i=0; i<hbiFutures.size(); i++) {
772 WorkItemHdfsRegionInfo work = hbis.get(i);
773 Future<Void> f = hbiFutures.get(i);
774 try {
775 f.get();
776 } catch(ExecutionException e) {
777 LOG.warn("Failed to read .regioninfo file for region " +
778 work.hbi.getRegionNameAsString(), e.getCause());
779 }
780 }
781
782
783 for (HbckInfo hbi: hbckInfos) {
784
785 if (hbi.getHdfsHRI() == null) {
786
787 continue;
788 }
789
790
791
792 String tableName = Bytes.toString(hbi.getTableName());
793 if (tableName == null) {
794
795 LOG.warn("tableName was null for: " + hbi);
796 continue;
797 }
798
799 TableInfo modTInfo = tablesInfo.get(tableName);
800 if (modTInfo == null) {
801
802 modTInfo = new TableInfo(tableName);
803 Path hbaseRoot = FSUtils.getRootDir(getConf());
804 tablesInfo.put(tableName, modTInfo);
805 try {
806 HTableDescriptor htd =
807 FSTableDescriptors.getTableDescriptor(hbaseRoot.getFileSystem(getConf()),
808 hbaseRoot, tableName);
809 modTInfo.htds.add(htd);
810 } catch (IOException ioe) {
811 if (!orphanTableDirs.containsKey(tableName)) {
812 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
813
814 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
815 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
816 Set<String> columns = new HashSet<String>();
817 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
818 }
819 }
820 }
821 if (!hbi.isSkipChecks()) {
822 modTInfo.addRegionInfo(hbi);
823 }
824 }
825
826 return tablesInfo;
827 }
828
829
830
831
832
833
834
835
836 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
837 Path regionDir = hbi.getHdfsRegionDir();
838 FileSystem fs = regionDir.getFileSystem(getConf());
839 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
840 for (FileStatus subdir : subDirs) {
841 String columnfamily = subdir.getPath().getName();
842 columns.add(columnfamily);
843 }
844 return columns;
845 }
846
847
848
849
850
851
852
853
854
855 private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
856 if (columns ==null || columns.isEmpty()) return false;
857 HTableDescriptor htd = new HTableDescriptor(tableName);
858 for (String columnfamimly : columns) {
859 htd.addFamily(new HColumnDescriptor(columnfamimly));
860 }
861 FSTableDescriptors.createTableDescriptor(htd, getConf(), true);
862 return true;
863 }
864
865
866
867
868
869 public void fixEmptyMetaCells() throws IOException {
870 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
871 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER .META. rows.");
872 for (Result region : emptyRegionInfoQualifiers) {
873 deleteMetaRegion(region.getRow());
874 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
875 }
876 emptyRegionInfoQualifiers.clear();
877 }
878 }
879
880
881
882
883
884
885
886
887
888
889 public void fixOrphanTables() throws IOException {
890 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
891
892 Path hbaseRoot = FSUtils.getRootDir(getConf());
893 List<String> tmpList = new ArrayList<String>();
894 tmpList.addAll(orphanTableDirs.keySet());
895 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
896 Iterator<Entry<String, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
897 int j = 0;
898 int numFailedCase = 0;
899 while (iter.hasNext()) {
900 Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
901 String tableName = entry.getKey();
902 LOG.info("Trying to fix orphan table error: " + tableName);
903 if (j < htds.length) {
904 if (tableName.equals(Bytes.toString(htds[j].getName()))) {
905 HTableDescriptor htd = htds[j];
906 LOG.info("fixing orphan table: " + tableName + " from cache");
907 FSTableDescriptors.createTableDescriptor(
908 hbaseRoot.getFileSystem(getConf()), hbaseRoot, htd, true);
909 j++;
910 iter.remove();
911 }
912 } else {
913 if (fabricateTableInfo(tableName, entry.getValue())) {
914 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
915 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
916 iter.remove();
917 } else {
918 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
919 numFailedCase++;
920 }
921 }
922 fixes++;
923 }
924
925 if (orphanTableDirs.isEmpty()) {
926
927
928 setShouldRerun();
929 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
930 } else if (numFailedCase > 0) {
931 LOG.error("Failed to fix " + numFailedCase
932 + " OrphanTables with default .tableinfo files");
933 }
934
935 }
936
937 orphanTableDirs.clear();
938
939 }
940
941
942
943
944
945
946 private HRegion createNewMeta() throws IOException {
947 Path rootdir = FSUtils.getRootDir(getConf());
948 Configuration c = getConf();
949 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
950 MasterFileSystem.setInfoFamilyCachingForMeta(false);
951 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
952 HTableDescriptor.META_TABLEDESC);
953 MasterFileSystem.setInfoFamilyCachingForMeta(true);
954 return meta;
955 }
956
957
958
959
960
961
962
963 private ArrayList<Put> generatePuts(SortedMap<String, TableInfo> tablesInfo) throws IOException {
964 ArrayList<Put> puts = new ArrayList<Put>();
965 boolean hasProblems = false;
966 for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
967 String name = e.getKey();
968
969
970 if (Bytes.compareTo(Bytes.toBytes(name), HConstants.META_TABLE_NAME) == 0) {
971 continue;
972 }
973
974 TableInfo ti = e.getValue();
975 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
976 .entrySet()) {
977 Collection<HbckInfo> his = spl.getValue();
978 int sz = his.size();
979 if (sz != 1) {
980
981 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
982 + " had " + sz + " regions instead of exactly 1." );
983 hasProblems = true;
984 continue;
985 }
986
987
988 HbckInfo hi = his.iterator().next();
989 HRegionInfo hri = hi.getHdfsHRI();
990 Put p = MetaEditor.makePutFromRegionInfo(hri);
991 puts.add(p);
992 }
993 }
994 return hasProblems ? null : puts;
995 }
996
997
998
999
1000 private void suggestFixes(SortedMap<String, TableInfo> tablesInfo) throws IOException {
1001 for (TableInfo tInfo : tablesInfo.values()) {
1002 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1003 tInfo.checkRegionChain(handler);
1004 }
1005 }
1006
1007
1008
1009
1010
1011
1012
1013
1014 public boolean rebuildMeta(boolean fix) throws IOException,
1015 InterruptedException {
1016
1017
1018
1019
1020
1021 LOG.info("Loading HBase regioninfo from HDFS...");
1022 loadHdfsRegionDirs();
1023
1024 int errs = errors.getErrorList().size();
1025 tablesInfo = loadHdfsRegionInfos();
1026 checkHdfsIntegrity(false, false);
1027
1028
1029 if (errors.getErrorList().size() != errs) {
1030
1031 while(true) {
1032 fixes = 0;
1033 suggestFixes(tablesInfo);
1034 errors.clear();
1035 loadHdfsRegionInfos();
1036 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1037
1038 int errCount = errors.getErrorList().size();
1039
1040 if (fixes == 0) {
1041 if (errCount > 0) {
1042 return false;
1043 } else {
1044 break;
1045 }
1046 }
1047 }
1048 }
1049
1050
1051 LOG.info("HDFS regioninfo's seems good. Sidelining old .META.");
1052 Path backupDir = sidelineOldMeta();
1053
1054 LOG.info("Creating new .META.");
1055 HRegion meta = createNewMeta();
1056
1057
1058 List<Put> puts = generatePuts(tablesInfo);
1059 if (puts == null) {
1060 LOG.fatal("Problem encountered when creating new .META. entries. " +
1061 "You may need to restore the previously sidelined .META.");
1062 return false;
1063 }
1064 meta.put(puts.toArray(new Put[0]));
1065 HRegion.closeHRegion(meta);
1066 LOG.info("Success! .META. table rebuilt.");
1067 LOG.info("Old .META. is moved into " + backupDir);
1068 return true;
1069 }
1070
1071 private SortedMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1072 boolean fixOverlaps) throws IOException {
1073 LOG.info("Checking HBase region split map from HDFS data...");
1074 for (TableInfo tInfo : tablesInfo.values()) {
1075 TableIntegrityErrorHandler handler;
1076 if (fixHoles || fixOverlaps) {
1077 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1078 fixHoles, fixOverlaps);
1079 } else {
1080 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1081 }
1082 if (!tInfo.checkRegionChain(handler)) {
1083
1084 errors.report("Found inconsistency in table " + tInfo.getName());
1085 }
1086 }
1087 return tablesInfo;
1088 }
1089
1090 private Path getSidelineDir() throws IOException {
1091 if (sidelineDir == null) {
1092 Path hbaseDir = FSUtils.getRootDir(getConf());
1093 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1094 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1095 + startMillis);
1096 }
1097 return sidelineDir;
1098 }
1099
1100
1101
1102
1103 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1104 return sidelineRegionDir(fs, null, hi);
1105 }
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 Path sidelineRegionDir(FileSystem fs,
1116 String parentDir, HbckInfo hi) throws IOException {
1117 String tableName = Bytes.toString(hi.getTableName());
1118 Path regionDir = hi.getHdfsRegionDir();
1119
1120 if (!fs.exists(regionDir)) {
1121 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1122 return null;
1123 }
1124
1125 Path rootDir = getSidelineDir();
1126 if (parentDir != null) {
1127 rootDir = new Path(rootDir, parentDir);
1128 }
1129 Path sidelineTableDir= new Path(rootDir, tableName);
1130 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1131 fs.mkdirs(sidelineRegionDir);
1132 boolean success = false;
1133 FileStatus[] cfs = fs.listStatus(regionDir);
1134 if (cfs == null) {
1135 LOG.info("Region dir is empty: " + regionDir);
1136 } else {
1137 for (FileStatus cf : cfs) {
1138 Path src = cf.getPath();
1139 Path dst = new Path(sidelineRegionDir, src.getName());
1140 if (fs.isFile(src)) {
1141
1142 success = fs.rename(src, dst);
1143 if (!success) {
1144 String msg = "Unable to rename file " + src + " to " + dst;
1145 LOG.error(msg);
1146 throw new IOException(msg);
1147 }
1148 continue;
1149 }
1150
1151
1152 fs.mkdirs(dst);
1153
1154 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1155
1156
1157
1158
1159 FileStatus[] hfiles = fs.listStatus(src);
1160 if (hfiles != null && hfiles.length > 0) {
1161 for (FileStatus hfile : hfiles) {
1162 success = fs.rename(hfile.getPath(), dst);
1163 if (!success) {
1164 String msg = "Unable to rename file " + src + " to " + dst;
1165 LOG.error(msg);
1166 throw new IOException(msg);
1167 }
1168 }
1169 }
1170 LOG.debug("Sideline directory contents:");
1171 debugLsr(sidelineRegionDir);
1172 }
1173 }
1174
1175 LOG.info("Removing old region dir: " + regionDir);
1176 success = fs.delete(regionDir, true);
1177 if (!success) {
1178 String msg = "Unable to delete dir " + regionDir;
1179 LOG.error(msg);
1180 throw new IOException(msg);
1181 }
1182 return sidelineRegionDir;
1183 }
1184
1185
1186
1187
1188 void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir,
1189 Path backupHbaseDir) throws IOException {
1190 String tableName = Bytes.toString(table);
1191 Path tableDir = new Path(hbaseDir, tableName);
1192 if (fs.exists(tableDir)) {
1193 Path backupTableDir= new Path(backupHbaseDir, tableName);
1194 boolean success = fs.rename(tableDir, backupTableDir);
1195 if (!success) {
1196 throw new IOException("Failed to move " + tableName + " from "
1197 + tableDir.getName() + " to " + backupTableDir.getName());
1198 }
1199 } else {
1200 LOG.info("No previous " + tableName + " exists. Continuing.");
1201 }
1202 }
1203
1204
1205
1206
1207 Path sidelineOldMeta() throws IOException {
1208
1209 Path hbaseDir = FSUtils.getRootDir(getConf());
1210 FileSystem fs = hbaseDir.getFileSystem(getConf());
1211 Path backupDir = getSidelineDir();
1212 fs.mkdirs(backupDir);
1213
1214 try {
1215 sidelineTable(fs, HConstants.META_TABLE_NAME, hbaseDir, backupDir);
1216 } catch (IOException e) {
1217 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1218 + "try to rename .META. in " + backupDir.getName() + " to "
1219 + hbaseDir.getName() + ".", e);
1220 throw e;
1221 }
1222 return backupDir;
1223 }
1224
1225
1226
1227
1228
1229
1230 private void loadDisabledTables()
1231 throws ZooKeeperConnectionException, IOException {
1232 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1233 @Override
1234 public Void connect(HConnection connection) throws IOException {
1235 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1236 try {
1237 for (String tableName : ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1238 disabledTables.add(Bytes.toBytes(tableName));
1239 }
1240 } catch (KeeperException ke) {
1241 throw new IOException(ke);
1242 } finally {
1243 zkw.close();
1244 }
1245 return null;
1246 }
1247 });
1248 }
1249
1250
1251
1252
1253 private boolean isTableDisabled(HRegionInfo regionInfo) {
1254 return disabledTables.contains(regionInfo.getTableName());
1255 }
1256
1257
1258
1259
1260
1261 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1262 Path rootDir = FSUtils.getRootDir(getConf());
1263 FileSystem fs = rootDir.getFileSystem(getConf());
1264
1265
1266 List<FileStatus> tableDirs = Lists.newArrayList();
1267
1268 boolean foundVersionFile = false;
1269 FileStatus[] files = fs.listStatus(rootDir);
1270 for (FileStatus file : files) {
1271 String dirName = file.getPath().getName();
1272 if (dirName.equals(HConstants.VERSION_FILE_NAME)) {
1273 foundVersionFile = true;
1274 } else {
1275 if ((!checkMetaOnly && isTableIncluded(dirName)) ||
1276 dirName.equals(".META.")) {
1277 tableDirs.add(file);
1278 }
1279 }
1280 }
1281
1282
1283 if (!foundVersionFile) {
1284 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1285 "Version file does not exist in root dir " + rootDir);
1286 if (shouldFixVersionFile()) {
1287 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1288 + " file.");
1289 setShouldRerun();
1290 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1291 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1292 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1293 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1294 }
1295 }
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321 private boolean recordMetaRegion() throws IOException {
1322 HRegionLocation metaLocation = connection.locateRegion(
1323 HConstants.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1324
1325
1326 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1327 metaLocation.getHostname() == null) {
1328 errors.reportError(ERROR_CODE.NULL_META_REGION,
1329 "META region or some of its attributes are null.");
1330 return false;
1331 }
1332 ServerName sn;
1333 try {
1334 sn = getMetaRegionServerName();
1335 } catch (KeeperException e) {
1336 throw new IOException(e);
1337 }
1338 MetaEntry m =
1339 new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1340 HbckInfo hbInfo = new HbckInfo(m);
1341 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo);
1342 return true;
1343 }
1344
1345 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1346 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1347 @Override
1348 public void abort(String why, Throwable e) {
1349 LOG.error(why, e);
1350 System.exit(1);
1351 }
1352
1353 @Override
1354 public boolean isAborted() {
1355 return false;
1356 }
1357
1358 });
1359 }
1360
1361 private ServerName getMetaRegionServerName()
1362 throws IOException, KeeperException {
1363 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1364 ServerName sn = null;
1365 try {
1366 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1367 } finally {
1368 zkw.close();
1369 }
1370 return sn;
1371 }
1372
1373
1374
1375
1376
1377
1378 void processRegionServers(Collection<ServerName> regionServerList)
1379 throws IOException, InterruptedException {
1380
1381 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1382 List<Future<Void>> workFutures;
1383
1384
1385 for (ServerName rsinfo: regionServerList) {
1386 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1387 }
1388
1389 workFutures = executor.invokeAll(workItems);
1390
1391 for(int i=0; i<workFutures.size(); i++) {
1392 WorkItemRegion item = workItems.get(i);
1393 Future<Void> f = workFutures.get(i);
1394 try {
1395 f.get();
1396 } catch(ExecutionException e) {
1397 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1398 e.getCause());
1399 }
1400 }
1401 }
1402
1403
1404
1405
1406 private void checkAndFixConsistency()
1407 throws IOException, KeeperException, InterruptedException {
1408 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1409 checkRegionConsistency(e.getKey(), e.getValue());
1410 }
1411 }
1412
1413 private void preCheckPermission() throws IOException, AccessControlException {
1414 if (shouldIgnorePreCheckPermission()) {
1415 return;
1416 }
1417
1418 Path hbaseDir = FSUtils.getRootDir(getConf());
1419 FileSystem fs = hbaseDir.getFileSystem(getConf());
1420 UserGroupInformation ugi = User.getCurrent().getUGI();
1421 FileStatus[] files = fs.listStatus(hbaseDir);
1422 for (FileStatus file : files) {
1423 try {
1424 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1425 } catch (AccessControlException ace) {
1426 LOG.warn("Got AccessControlException when preCheckPermission ", ace);
1427 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1428 + " does not have write perms to " + file.getPath()
1429 + ". Please rerun hbck as hdfs user " + file.getOwner());
1430 throw new AccessControlException(ace);
1431 }
1432 }
1433 }
1434
1435
1436
1437
1438 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1439 deleteMetaRegion(hi.metaEntry.getRegionName());
1440 }
1441
1442
1443
1444
1445 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1446 Delete d = new Delete(metaKey);
1447 meta.delete(d);
1448 meta.flushCommits();
1449 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1450 }
1451
1452
1453
1454
1455 private void resetSplitParent(HbckInfo hi) throws IOException {
1456 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1457 Delete d = new Delete(hi.metaEntry.getRegionName());
1458 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1459 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1460 mutations.add(d);
1461
1462 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1463 hri.setOffline(false);
1464 hri.setSplit(false);
1465 Put p = MetaEditor.makePutFromRegionInfo(hri);
1466 mutations.add(p);
1467
1468 meta.mutateRow(mutations);
1469 meta.flushCommits();
1470 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1471 }
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481 private void offline(byte[] regionName) throws IOException {
1482 String regionString = Bytes.toStringBinary(regionName);
1483 if (!rsSupportsOffline) {
1484 LOG.warn("Using unassign region " + regionString
1485 + " instead of using offline method, you should"
1486 + " restart HMaster after these repairs");
1487 admin.unassign(regionName, true);
1488 return;
1489 }
1490
1491
1492 try {
1493 LOG.info("Offlining region " + regionString);
1494 admin.offline(regionName);
1495 } catch (IOException ioe) {
1496 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1497 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1498 if (ioe.getMessage().contains(notFoundMsg)) {
1499 LOG.warn("Using unassign region " + regionString
1500 + " instead of using offline method, you should"
1501 + " restart HMaster after these repairs");
1502 rsSupportsOffline = false;
1503 admin.unassign(regionName, true);
1504 return;
1505 }
1506 throw ioe;
1507 }
1508 }
1509
1510 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1511 for (OnlineEntry rse : hi.deployedEntries) {
1512 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1513 try {
1514 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1515 offline(rse.hri.getRegionName());
1516 } catch (IOException ioe) {
1517 LOG.warn("Got exception when attempting to offline region "
1518 + Bytes.toString(rse.hri.getRegionName()), ioe);
1519 }
1520 }
1521 }
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1536 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1537 undeployRegions(hi);
1538 return;
1539 }
1540
1541
1542 Get get = new Get(hi.getRegionName());
1543 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1544 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1545 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1546 Result r = meta.get(get);
1547 ServerName serverName = HRegionInfo.getServerName(r);
1548 if (serverName == null) {
1549 errors.reportError("Unable to close region "
1550 + hi.getRegionNameAsString() + " because meta does not "
1551 + "have handle to reach it.");
1552 return;
1553 }
1554
1555 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1556 if (hri == null) {
1557 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1558 + " because META had invalid or missing "
1559 + HConstants.CATALOG_FAMILY_STR + ":"
1560 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1561 + " qualifier value.");
1562 return;
1563 }
1564
1565
1566 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1567 }
1568
1569 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1570 KeeperException, InterruptedException {
1571
1572 if (shouldFixAssignments()) {
1573 errors.print(msg);
1574 undeployRegions(hbi);
1575 setShouldRerun();
1576 HRegionInfo hri = hbi.getHdfsHRI();
1577 if (hri == null) {
1578 hri = hbi.metaEntry;
1579 }
1580 HBaseFsckRepair.fixUnassigned(admin, hri);
1581 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1582 }
1583 }
1584
1585
1586
1587
1588 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1589 throws IOException, KeeperException, InterruptedException {
1590 String descriptiveName = hbi.toString();
1591
1592 boolean inMeta = hbi.metaEntry != null;
1593
1594 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1595 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1596 boolean isDeployed = !hbi.deployedOn.isEmpty();
1597 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1598 boolean deploymentMatchesMeta =
1599 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1600 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1601 boolean splitParent =
1602 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1603 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1604 boolean recentlyModified = inHdfs &&
1605 hbi.getModTime() + timelag > System.currentTimeMillis();
1606
1607
1608 if (hbi.containsOnlyHdfsEdits()) {
1609 return;
1610 }
1611 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1612 return;
1613 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1614 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1615 "tabled that is not deployed");
1616 return;
1617 } else if (recentlyModified) {
1618 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1619 return;
1620 }
1621
1622 else if (!inMeta && !inHdfs && !isDeployed) {
1623
1624 assert false : "Entry for region with no data";
1625 } else if (!inMeta && !inHdfs && isDeployed) {
1626 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1627 + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
1628 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1629 if (shouldFixAssignments()) {
1630 undeployRegions(hbi);
1631 }
1632
1633 } else if (!inMeta && inHdfs && !isDeployed) {
1634 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1635 + descriptiveName + " on HDFS, but not listed in META " +
1636 "or deployed on any region server");
1637
1638 if (shouldFixMeta()) {
1639 if (!hbi.isHdfsRegioninfoPresent()) {
1640 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1641 + " in table integrity repair phase if -fixHdfsOrphans was" +
1642 " used.");
1643 return;
1644 }
1645
1646 LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
1647 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1648
1649 tryAssignmentRepair(hbi, "Trying to reassign region...");
1650 }
1651
1652 } else if (!inMeta && inHdfs && isDeployed) {
1653 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1654 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1655 debugLsr(hbi.getHdfsRegionDir());
1656 if (shouldFixMeta()) {
1657 if (!hbi.isHdfsRegioninfoPresent()) {
1658 LOG.error("This should have been repaired in table integrity repair phase");
1659 return;
1660 }
1661
1662 LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
1663 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1664
1665 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1666 }
1667
1668
1669 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1670
1671
1672 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1673
1674 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1675 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1676 if (infoA != null && infoB != null) {
1677
1678 hbi.setSkipChecks(true);
1679 return;
1680 }
1681 }
1682 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
1683 + descriptiveName + " is a split parent in META, in HDFS, "
1684 + "and not deployed on any region server. This could be transient.");
1685 if (shouldFixSplitParents()) {
1686 setShouldRerun();
1687 resetSplitParent(hbi);
1688 }
1689 } else if (inMeta && !inHdfs && !isDeployed) {
1690 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1691 + descriptiveName + " found in META, but not in HDFS "
1692 + "or deployed on any region server.");
1693 if (shouldFixMeta()) {
1694 deleteMetaRegion(hbi);
1695 }
1696 } else if (inMeta && !inHdfs && isDeployed) {
1697 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1698 + " found in META, but not in HDFS, " +
1699 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1700
1701
1702
1703 if (shouldFixAssignments()) {
1704 errors.print("Trying to fix unassigned region...");
1705 closeRegion(hbi);
1706 }
1707 if (shouldFixMeta()) {
1708
1709 deleteMetaRegion(hbi);
1710 }
1711 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1712 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1713 + " not deployed on any region server.");
1714 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1715 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1716 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1717 "Region " + descriptiveName + " should not be deployed according " +
1718 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1719 if (shouldFixAssignments()) {
1720 errors.print("Trying to close the region " + descriptiveName);
1721 setShouldRerun();
1722 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1723 }
1724 } else if (inMeta && inHdfs && isMultiplyDeployed) {
1725 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1726 + " is listed in META on region server " + hbi.metaEntry.regionServer
1727 + " but is multiply assigned to region servers " +
1728 Joiner.on(", ").join(hbi.deployedOn));
1729
1730 if (shouldFixAssignments()) {
1731 errors.print("Trying to fix assignment error...");
1732 setShouldRerun();
1733 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1734 }
1735 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1736 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1737 + descriptiveName + " listed in META on region server " +
1738 hbi.metaEntry.regionServer + " but found on region server " +
1739 hbi.deployedOn.get(0));
1740
1741 if (shouldFixAssignments()) {
1742 errors.print("Trying to fix assignment error...");
1743 setShouldRerun();
1744 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1745 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1746 }
1747 } else {
1748 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1749 " is in an unforeseen state:" +
1750 " inMeta=" + inMeta +
1751 " inHdfs=" + inHdfs +
1752 " isDeployed=" + isDeployed +
1753 " isMultiplyDeployed=" + isMultiplyDeployed +
1754 " deploymentMatchesMeta=" + deploymentMatchesMeta +
1755 " shouldBeDeployed=" + shouldBeDeployed);
1756 }
1757 }
1758
1759
1760
1761
1762
1763
1764
1765 SortedMap<String, TableInfo> checkIntegrity() throws IOException {
1766 tablesInfo = new TreeMap<String,TableInfo> ();
1767 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1768 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1769 for (HbckInfo hbi : regionInfoMap.values()) {
1770
1771 if (hbi.metaEntry == null) {
1772
1773 noHDFSRegionInfos.add(hbi);
1774 Path p = hbi.getHdfsRegionDir();
1775 if (p == null) {
1776 errors.report("No regioninfo in Meta or HDFS. " + hbi);
1777 }
1778
1779
1780 continue;
1781 }
1782 if (hbi.metaEntry.regionServer == null) {
1783 errors.detail("Skipping region because no region server: " + hbi);
1784 continue;
1785 }
1786 if (hbi.metaEntry.isOffline()) {
1787 errors.detail("Skipping region because it is offline: " + hbi);
1788 continue;
1789 }
1790 if (hbi.containsOnlyHdfsEdits()) {
1791 errors.detail("Skipping region because it only contains edits" + hbi);
1792 continue;
1793 }
1794
1795
1796
1797
1798
1799
1800 if (hbi.deployedOn.size() == 0) continue;
1801
1802
1803 String tableName = hbi.metaEntry.getTableNameAsString();
1804 TableInfo modTInfo = tablesInfo.get(tableName);
1805 if (modTInfo == null) {
1806 modTInfo = new TableInfo(tableName);
1807 }
1808 for (ServerName server : hbi.deployedOn) {
1809 modTInfo.addServer(server);
1810 }
1811
1812 if (!hbi.isSkipChecks()) {
1813 modTInfo.addRegionInfo(hbi);
1814 }
1815
1816 tablesInfo.put(tableName, modTInfo);
1817 }
1818
1819 for (TableInfo tInfo : tablesInfo.values()) {
1820 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1821 if (!tInfo.checkRegionChain(handler)) {
1822 errors.report("Found inconsistency in table " + tInfo.getName());
1823 }
1824 }
1825 return tablesInfo;
1826 }
1827
1828
1829
1830
1831
1832 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1833 int fileMoves = 0;
1834
1835 LOG.debug("Contained region dir after close and pause");
1836 debugLsr(contained.getHdfsRegionDir());
1837
1838
1839 FileSystem fs = targetRegionDir.getFileSystem(getConf());
1840 FileStatus[] dirs = fs.listStatus(contained.getHdfsRegionDir());
1841
1842 if (dirs == null) {
1843 if (!fs.exists(contained.getHdfsRegionDir())) {
1844 LOG.warn("HDFS region dir " + contained.getHdfsRegionDir() + " already sidelined.");
1845 } else {
1846 sidelineRegionDir(fs, contained);
1847 }
1848 return fileMoves;
1849 }
1850
1851 for (FileStatus cf : dirs) {
1852 Path src = cf.getPath();
1853 Path dst = new Path(targetRegionDir, src.getName());
1854
1855 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
1856
1857 continue;
1858 }
1859
1860 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
1861
1862 continue;
1863 }
1864
1865 LOG.info("Moving files from " + src + " into containing region " + dst);
1866
1867
1868
1869
1870 for (FileStatus hfile : fs.listStatus(src)) {
1871 boolean success = fs.rename(hfile.getPath(), dst);
1872 if (success) {
1873 fileMoves++;
1874 }
1875 }
1876 LOG.debug("Sideline directory contents:");
1877 debugLsr(targetRegionDir);
1878 }
1879
1880
1881 sidelineRegionDir(fs, contained);
1882 LOG.info("Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
1883 getSidelineDir());
1884 debugLsr(contained.getHdfsRegionDir());
1885
1886 return fileMoves;
1887 }
1888
1889
1890
1891
1892 public class TableInfo {
1893 String tableName;
1894 TreeSet <ServerName> deployedOn;
1895
1896
1897 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1898
1899
1900 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
1901
1902
1903 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
1904
1905
1906 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
1907
1908
1909 final Multimap<byte[], HbckInfo> overlapGroups =
1910 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
1911
1912 TableInfo(String name) {
1913 this.tableName = name;
1914 deployedOn = new TreeSet <ServerName>();
1915 }
1916
1917
1918
1919
1920 private HTableDescriptor getHTD() {
1921 if (htds.size() == 1) {
1922 return (HTableDescriptor)htds.toArray()[0];
1923 } else {
1924 LOG.error("None/Multiple table descriptors found for table '"
1925 + tableName + "' regions: " + htds);
1926 }
1927 return null;
1928 }
1929
1930 public void addRegionInfo(HbckInfo hir) {
1931 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
1932
1933 sc.add(hir);
1934 return;
1935 }
1936
1937
1938 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
1939 errors.reportError(
1940 ERROR_CODE.REGION_CYCLE,
1941 String.format("The endkey for this region comes before the "
1942 + "startkey, startkey=%s, endkey=%s",
1943 Bytes.toStringBinary(hir.getStartKey()),
1944 Bytes.toStringBinary(hir.getEndKey())), this, hir);
1945 backwards.add(hir);
1946 return;
1947 }
1948
1949
1950 sc.add(hir);
1951 }
1952
1953 public void addServer(ServerName server) {
1954 this.deployedOn.add(server);
1955 }
1956
1957 public String getName() {
1958 return tableName;
1959 }
1960
1961 public int getNumRegions() {
1962 return sc.getStarts().size() + backwards.size();
1963 }
1964
1965 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
1966 ErrorReporter errors;
1967
1968 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
1969 this.errors = errors;
1970 setTableInfo(ti);
1971 }
1972
1973 @Override
1974 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
1975 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
1976 "First region should start with an empty key. You need to "
1977 + " create a new region and regioninfo in HDFS to plug the hole.",
1978 getTableInfo(), hi);
1979 }
1980
1981 @Override
1982 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
1983 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
1984 "Last region should end with an empty key. You need to "
1985 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
1986 }
1987
1988 @Override
1989 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
1990 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
1991 "Region has the same start and end key.", getTableInfo(), hi);
1992 }
1993
1994 @Override
1995 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
1996 byte[] key = r1.getStartKey();
1997
1998 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
1999 "Multiple regions have the same startkey: "
2000 + Bytes.toStringBinary(key), getTableInfo(), r1);
2001 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2002 "Multiple regions have the same startkey: "
2003 + Bytes.toStringBinary(key), getTableInfo(), r2);
2004 }
2005
2006 @Override
2007 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2008 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2009 "There is an overlap in the region chain.",
2010 getTableInfo(), hi1, hi2);
2011 }
2012
2013 @Override
2014 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2015 errors.reportError(
2016 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2017 "There is a hole in the region chain between "
2018 + Bytes.toStringBinary(holeStart) + " and "
2019 + Bytes.toStringBinary(holeStop)
2020 + ". You need to create a new .regioninfo and region "
2021 + "dir in hdfs to plug the hole.");
2022 }
2023 };
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2038 Configuration conf;
2039
2040 boolean fixOverlaps = true;
2041
2042 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2043 boolean fixHoles, boolean fixOverlaps) {
2044 super(ti, errors);
2045 this.conf = conf;
2046 this.fixOverlaps = fixOverlaps;
2047
2048 }
2049
2050
2051
2052
2053
2054
2055 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2056 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2057 "First region should start with an empty key. Creating a new " +
2058 "region and regioninfo in HDFS to plug the hole.",
2059 getTableInfo(), next);
2060 HTableDescriptor htd = getTableInfo().getHTD();
2061
2062 HRegionInfo newRegion = new HRegionInfo(htd.getName(),
2063 HConstants.EMPTY_START_ROW, next.getStartKey());
2064
2065
2066 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2067 LOG.info("Table region start key was not empty. Created new empty region: "
2068 + newRegion + " " +region);
2069 fixes++;
2070 }
2071
2072 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2073 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2074 "Last region should end with an empty key. Creating a new "
2075 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2076 HTableDescriptor htd = getTableInfo().getHTD();
2077
2078 HRegionInfo newRegion = new HRegionInfo(htd.getName(), curEndKey,
2079 HConstants.EMPTY_START_ROW);
2080
2081 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2082 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2083 + " " + region);
2084 fixes++;
2085 }
2086
2087
2088
2089
2090
2091 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2092 errors.reportError(
2093 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2094 "There is a hole in the region chain between "
2095 + Bytes.toStringBinary(holeStartKey) + " and "
2096 + Bytes.toStringBinary(holeStopKey)
2097 + ". Creating a new regioninfo and region "
2098 + "dir in hdfs to plug the hole.");
2099 HTableDescriptor htd = getTableInfo().getHTD();
2100 HRegionInfo newRegion = new HRegionInfo(htd.getName(), holeStartKey, holeStopKey);
2101 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2102 LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
2103 fixes++;
2104 }
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115 @Override
2116 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2117 throws IOException {
2118 Preconditions.checkNotNull(overlap);
2119 Preconditions.checkArgument(overlap.size() >0);
2120
2121 if (!this.fixOverlaps) {
2122 LOG.warn("Not attempting to repair overlaps.");
2123 return;
2124 }
2125
2126 if (overlap.size() > maxMerge) {
2127 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2128 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2129 if (sidelineBigOverlaps) {
2130
2131 sidelineBigOverlaps(overlap);
2132 }
2133 return;
2134 }
2135
2136 mergeOverlaps(overlap);
2137 }
2138
2139 void mergeOverlaps(Collection<HbckInfo> overlap)
2140 throws IOException {
2141 LOG.info("== Merging regions into one region: "
2142 + Joiner.on(",").join(overlap));
2143
2144 Pair<byte[], byte[]> range = null;
2145 for (HbckInfo hi : overlap) {
2146 if (range == null) {
2147 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2148 } else {
2149 if (RegionSplitCalculator.BYTES_COMPARATOR
2150 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2151 range.setFirst(hi.getStartKey());
2152 }
2153 if (RegionSplitCalculator.BYTES_COMPARATOR
2154 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2155 range.setSecond(hi.getEndKey());
2156 }
2157 }
2158
2159 LOG.debug("Closing region before moving data around: " + hi);
2160 LOG.debug("Contained region dir before close");
2161 debugLsr(hi.getHdfsRegionDir());
2162 try {
2163 LOG.info("Closing region: " + hi);
2164 closeRegion(hi);
2165 } catch (IOException ioe) {
2166 LOG.warn("Was unable to close region " + hi
2167 + ". Just continuing... ", ioe);
2168 } catch (InterruptedException e) {
2169 LOG.warn("Was unable to close region " + hi
2170 + ". Just continuing... ", e);
2171 }
2172
2173 try {
2174 LOG.info("Offlining region: " + hi);
2175 offline(hi.getRegionName());
2176 } catch (IOException ioe) {
2177 LOG.warn("Unable to offline region from master: " + hi
2178 + ". Just continuing... ", ioe);
2179 }
2180 }
2181
2182
2183 HTableDescriptor htd = getTableInfo().getHTD();
2184
2185 HRegionInfo newRegion = new HRegionInfo(htd.getName(), range.getFirst(),
2186 range.getSecond());
2187 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2188 LOG.info("Created new empty container region: " +
2189 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2190 debugLsr(region.getRegionFileSystem().getRegionDir());
2191
2192
2193 boolean didFix= false;
2194 Path target = region.getRegionFileSystem().getRegionDir();
2195 for (HbckInfo contained : overlap) {
2196 LOG.info("Merging " + contained + " into " + target );
2197 int merges = mergeRegionDirs(target, contained);
2198 if (merges > 0) {
2199 didFix = true;
2200 }
2201 }
2202 if (didFix) {
2203 fixes++;
2204 }
2205 }
2206
2207
2208
2209
2210
2211
2212
2213
2214 void sidelineBigOverlaps(
2215 Collection<HbckInfo> bigOverlap) throws IOException {
2216 int overlapsToSideline = bigOverlap.size() - maxMerge;
2217 if (overlapsToSideline > maxOverlapsToSideline) {
2218 overlapsToSideline = maxOverlapsToSideline;
2219 }
2220 List<HbckInfo> regionsToSideline =
2221 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2222 FileSystem fs = FileSystem.get(conf);
2223 for (HbckInfo regionToSideline: regionsToSideline) {
2224 try {
2225 LOG.info("Closing region: " + regionToSideline);
2226 closeRegion(regionToSideline);
2227 } catch (IOException ioe) {
2228 LOG.warn("Was unable to close region " + regionToSideline
2229 + ". Just continuing... ", ioe);
2230 } catch (InterruptedException e) {
2231 LOG.warn("Was unable to close region " + regionToSideline
2232 + ". Just continuing... ", e);
2233 }
2234
2235 try {
2236 LOG.info("Offlining region: " + regionToSideline);
2237 offline(regionToSideline.getRegionName());
2238 } catch (IOException ioe) {
2239 LOG.warn("Unable to offline region from master: " + regionToSideline
2240 + ". Just continuing... ", ioe);
2241 }
2242
2243 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2244 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2245 if (sidelineRegionDir != null) {
2246 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2247 LOG.info("After sidelined big overlapped region: "
2248 + regionToSideline.getRegionNameAsString()
2249 + " to " + sidelineRegionDir.toString());
2250 fixes++;
2251 }
2252 }
2253 }
2254 }
2255
2256
2257
2258
2259
2260
2261
2262 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2263
2264
2265
2266 if (disabledTables.contains(this.tableName.getBytes())) {
2267 return true;
2268 }
2269 int originalErrorsCount = errors.getErrorList().size();
2270 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2271 SortedSet<byte[]> splits = sc.getSplits();
2272
2273 byte[] prevKey = null;
2274 byte[] problemKey = null;
2275 for (byte[] key : splits) {
2276 Collection<HbckInfo> ranges = regions.get(key);
2277 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2278 for (HbckInfo rng : ranges) {
2279 handler.handleRegionStartKeyNotEmpty(rng);
2280 }
2281 }
2282
2283
2284 for (HbckInfo rng : ranges) {
2285
2286 byte[] endKey = rng.getEndKey();
2287 endKey = (endKey.length == 0) ? null : endKey;
2288 if (Bytes.equals(rng.getStartKey(),endKey)) {
2289 handler.handleDegenerateRegion(rng);
2290 }
2291 }
2292
2293 if (ranges.size() == 1) {
2294
2295 if (problemKey != null) {
2296 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2297 }
2298 problemKey = null;
2299 } else if (ranges.size() > 1) {
2300
2301
2302 if (problemKey == null) {
2303
2304 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2305 problemKey = key;
2306 }
2307 overlapGroups.putAll(problemKey, ranges);
2308
2309
2310 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2311
2312 for (HbckInfo r1 : ranges) {
2313 subRange.remove(r1);
2314 for (HbckInfo r2 : subRange) {
2315 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2316 handler.handleDuplicateStartKeys(r1,r2);
2317 } else {
2318
2319 handler.handleOverlapInRegionChain(r1, r2);
2320 }
2321 }
2322 }
2323
2324 } else if (ranges.size() == 0) {
2325 if (problemKey != null) {
2326 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2327 }
2328 problemKey = null;
2329
2330 byte[] holeStopKey = sc.getSplits().higher(key);
2331
2332 if (holeStopKey != null) {
2333
2334 handler.handleHoleInRegionChain(key, holeStopKey);
2335 }
2336 }
2337 prevKey = key;
2338 }
2339
2340
2341
2342 if (prevKey != null) {
2343 handler.handleRegionEndKeyNotEmpty(prevKey);
2344 }
2345
2346 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2347 handler.handleOverlapGroup(overlap);
2348 }
2349
2350 if (details) {
2351
2352 errors.print("---- Table '" + this.tableName
2353 + "': region split map");
2354 dump(splits, regions);
2355 errors.print("---- Table '" + this.tableName
2356 + "': overlap groups");
2357 dumpOverlapProblems(overlapGroups);
2358 errors.print("There are " + overlapGroups.keySet().size()
2359 + " overlap groups with " + overlapGroups.size()
2360 + " overlapping regions");
2361 }
2362 if (!sidelinedRegions.isEmpty()) {
2363 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2364 errors.print("---- Table '" + this.tableName
2365 + "': sidelined big overlapped regions");
2366 dumpSidelinedRegions(sidelinedRegions);
2367 }
2368 return errors.getErrorList().size() == originalErrorsCount;
2369 }
2370
2371
2372
2373
2374
2375
2376
2377 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2378
2379 StringBuilder sb = new StringBuilder();
2380 for (byte[] k : splits) {
2381 sb.setLength(0);
2382 sb.append(Bytes.toStringBinary(k) + ":\t");
2383 for (HbckInfo r : regions.get(k)) {
2384 sb.append("[ "+ r.toString() + ", "
2385 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2386 }
2387 errors.print(sb.toString());
2388 }
2389 }
2390 }
2391
2392 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2393
2394
2395 for (byte[] k : regions.keySet()) {
2396 errors.print(Bytes.toStringBinary(k) + ":");
2397 for (HbckInfo r : regions.get(k)) {
2398 errors.print("[ " + r.toString() + ", "
2399 + Bytes.toStringBinary(r.getEndKey()) + "]");
2400 }
2401 errors.print("----");
2402 }
2403 }
2404
2405 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2406 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2407 String tableName = Bytes.toStringBinary(entry.getValue().getTableName());
2408 Path path = entry.getKey();
2409 errors.print("This sidelined region dir should be bulk loaded: "
2410 + path.toString());
2411 errors.print("Bulk load command looks like: "
2412 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2413 + path.toUri().getPath() + " "+ tableName);
2414 }
2415 }
2416
2417 public Multimap<byte[], HbckInfo> getOverlapGroups(
2418 String table) {
2419 TableInfo ti = tablesInfo.get(table);
2420 return ti.overlapGroups;
2421 }
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2433 List<String> tableNames = new ArrayList<String>();
2434 long now = System.currentTimeMillis();
2435
2436 for (HbckInfo hbi : regionInfoMap.values()) {
2437 MetaEntry info = hbi.metaEntry;
2438
2439
2440
2441 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2442 if (info.modTime + timelag < now) {
2443 tableNames.add(info.getTableNameAsString());
2444 } else {
2445 numSkipped.incrementAndGet();
2446 }
2447 }
2448 }
2449 return getHTableDescriptors(tableNames);
2450 }
2451
2452 HTableDescriptor[] getHTableDescriptors(List<String> tableNames) {
2453 HTableDescriptor[] htd = new HTableDescriptor[0];
2454 try {
2455 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2456 htd = new HBaseAdmin(getConf()).getTableDescriptors(tableNames);
2457 } catch (IOException e) {
2458 LOG.debug("Exception getting table descriptors", e);
2459 }
2460 return htd;
2461 }
2462
2463
2464
2465
2466
2467
2468
2469 private synchronized HbckInfo getOrCreateInfo(String name) {
2470 HbckInfo hbi = regionInfoMap.get(name);
2471 if (hbi == null) {
2472 hbi = new HbckInfo(null);
2473 regionInfoMap.put(name, hbi);
2474 }
2475 return hbi;
2476 }
2477
2478 private void checkAndFixTableLocks() throws IOException {
2479 TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
2480 checker.checkTableLocks();
2481
2482 if (this.fixTableLocks) {
2483 checker.fixExpiredTableLocks();
2484 }
2485 }
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496 boolean checkMetaRegion()
2497 throws IOException, KeeperException, InterruptedException {
2498 List <HbckInfo> metaRegions = Lists.newArrayList();
2499 for (HbckInfo value : regionInfoMap.values()) {
2500 if (value.metaEntry.isMetaRegion()) {
2501 metaRegions.add(value);
2502 }
2503 }
2504
2505
2506 if (metaRegions.size() != 1) {
2507 HRegionLocation rootLocation = connection.locateRegion(
2508 HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
2509 HbckInfo root =
2510 regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
2511
2512
2513 if (metaRegions.size() == 0) {
2514 errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
2515 if (shouldFixAssignments()) {
2516 errors.print("Trying to fix a problem with .META...");
2517 setShouldRerun();
2518
2519 HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
2520 HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
2521 }
2522 }
2523
2524 else if (metaRegions.size() > 1) {
2525 errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
2526 if (shouldFixAssignments()) {
2527 errors.print("Trying to fix a problem with .META...");
2528 setShouldRerun();
2529
2530 List <ServerName> deployedOn = Lists.newArrayList();
2531 for (HbckInfo mRegion : metaRegions) {
2532 deployedOn.add(mRegion.metaEntry.regionServer);
2533 }
2534 HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
2535 }
2536 }
2537
2538 return false;
2539 }
2540
2541 return true;
2542 }
2543
2544
2545
2546
2547
2548 boolean loadMetaEntries() throws IOException {
2549
2550
2551
2552 if (!recordMetaRegion()) {
2553
2554 errors.reportError("Fatal error: unable to get root region location. Exiting...");
2555 return false;
2556 }
2557
2558 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
2559 int countRecord = 1;
2560
2561
2562 final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
2563 public int compare(KeyValue k1, KeyValue k2) {
2564 return (int)(k1.getTimestamp() - k2.getTimestamp());
2565 }
2566 };
2567
2568 public boolean processRow(Result result) throws IOException {
2569 try {
2570
2571
2572 long ts = Collections.max(result.list(), comp).getTimestamp();
2573 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
2574 if (pair == null || pair.getFirst() == null) {
2575 emptyRegionInfoQualifiers.add(result);
2576 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2577 "Empty REGIONINFO_QUALIFIER found in .META.");
2578 return true;
2579 }
2580 ServerName sn = null;
2581 if (pair.getSecond() != null) {
2582 sn = pair.getSecond();
2583 }
2584 HRegionInfo hri = pair.getFirst();
2585 if (!(isTableIncluded(hri.getTableNameAsString())
2586 || hri.isMetaRegion())) {
2587 return true;
2588 }
2589 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
2590 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
2591 HbckInfo hbInfo = new HbckInfo(m);
2592 HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
2593 if (previous != null) {
2594 throw new IOException("Two entries in META are same " + previous);
2595 }
2596
2597
2598 if (countRecord % 100 == 0) {
2599 errors.progress();
2600 }
2601 countRecord++;
2602 return true;
2603 } catch (RuntimeException e) {
2604 LOG.error("Result=" + result);
2605 throw e;
2606 }
2607 }
2608 };
2609 if (!checkMetaOnly) {
2610
2611 MetaScanner.metaScan(getConf(), visitor);
2612 }
2613
2614 errors.print("");
2615 return true;
2616 }
2617
2618
2619
2620
2621 static class MetaEntry extends HRegionInfo {
2622 ServerName regionServer;
2623 long modTime;
2624 HRegionInfo splitA, splitB;
2625
2626 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2627 this(rinfo, regionServer, modTime, null, null);
2628 }
2629
2630 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
2631 HRegionInfo splitA, HRegionInfo splitB) {
2632 super(rinfo);
2633 this.regionServer = regionServer;
2634 this.modTime = modTime;
2635 this.splitA = splitA;
2636 this.splitB = splitB;
2637 }
2638
2639 public boolean equals(Object o) {
2640 boolean superEq = super.equals(o);
2641 if (!superEq) {
2642 return superEq;
2643 }
2644
2645 MetaEntry me = (MetaEntry) o;
2646 if (!regionServer.equals(me.regionServer)) {
2647 return false;
2648 }
2649 return (modTime == me.modTime);
2650 }
2651
2652 @Override
2653 public int hashCode() {
2654 int hash = Arrays.hashCode(getRegionName());
2655 hash ^= getRegionId();
2656 hash ^= Arrays.hashCode(getStartKey());
2657 hash ^= Arrays.hashCode(getEndKey());
2658 hash ^= Boolean.valueOf(isOffline()).hashCode();
2659 hash ^= Arrays.hashCode(getTableName());
2660 if (regionServer != null) {
2661 hash ^= regionServer.hashCode();
2662 }
2663 hash ^= modTime;
2664 return hash;
2665 }
2666 }
2667
2668
2669
2670
2671 static class HdfsEntry {
2672 HRegionInfo hri;
2673 Path hdfsRegionDir = null;
2674 long hdfsRegionDirModTime = 0;
2675 boolean hdfsRegioninfoFilePresent = false;
2676 boolean hdfsOnlyEdits = false;
2677 }
2678
2679
2680
2681
2682 static class OnlineEntry {
2683 HRegionInfo hri;
2684 ServerName hsa;
2685
2686 public String toString() {
2687 return hsa.toString() + ";" + hri.getRegionNameAsString();
2688 }
2689 }
2690
2691
2692
2693
2694
2695 public static class HbckInfo implements KeyRange {
2696 private MetaEntry metaEntry = null;
2697 private HdfsEntry hdfsEntry = null;
2698 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
2699 private List<ServerName> deployedOn = Lists.newArrayList();
2700 private boolean skipChecks = false;
2701
2702 HbckInfo(MetaEntry metaEntry) {
2703 this.metaEntry = metaEntry;
2704 }
2705
2706 public synchronized void addServer(HRegionInfo hri, ServerName server) {
2707 OnlineEntry rse = new OnlineEntry() ;
2708 rse.hri = hri;
2709 rse.hsa = server;
2710 this.deployedEntries.add(rse);
2711 this.deployedOn.add(server);
2712 }
2713
2714 public synchronized String toString() {
2715 StringBuilder sb = new StringBuilder();
2716 sb.append("{ meta => ");
2717 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2718 sb.append( ", hdfs => " + getHdfsRegionDir());
2719 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2720 sb.append(" }");
2721 return sb.toString();
2722 }
2723
2724 @Override
2725 public byte[] getStartKey() {
2726 if (this.metaEntry != null) {
2727 return this.metaEntry.getStartKey();
2728 } else if (this.hdfsEntry != null) {
2729 return this.hdfsEntry.hri.getStartKey();
2730 } else {
2731 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2732 return null;
2733 }
2734 }
2735
2736 @Override
2737 public byte[] getEndKey() {
2738 if (this.metaEntry != null) {
2739 return this.metaEntry.getEndKey();
2740 } else if (this.hdfsEntry != null) {
2741 return this.hdfsEntry.hri.getEndKey();
2742 } else {
2743 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2744 return null;
2745 }
2746 }
2747
2748 public byte[] getTableName() {
2749 if (this.metaEntry != null) {
2750 return this.metaEntry.getTableName();
2751 } else if (this.hdfsEntry != null) {
2752
2753
2754 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
2755 return Bytes.toBytes(tableDir.getName());
2756 } else {
2757
2758
2759 return null;
2760 }
2761 }
2762
2763 public String getRegionNameAsString() {
2764 if (metaEntry != null) {
2765 return metaEntry.getRegionNameAsString();
2766 } else if (hdfsEntry != null) {
2767 if (hdfsEntry.hri != null) {
2768 return hdfsEntry.hri.getRegionNameAsString();
2769 }
2770 }
2771 return null;
2772 }
2773
2774 public byte[] getRegionName() {
2775 if (metaEntry != null) {
2776 return metaEntry.getRegionName();
2777 } else if (hdfsEntry != null) {
2778 return hdfsEntry.hri.getRegionName();
2779 } else {
2780 return null;
2781 }
2782 }
2783
2784 Path getHdfsRegionDir() {
2785 if (hdfsEntry == null) {
2786 return null;
2787 }
2788 return hdfsEntry.hdfsRegionDir;
2789 }
2790
2791 boolean containsOnlyHdfsEdits() {
2792 if (hdfsEntry == null) {
2793 return false;
2794 }
2795 return hdfsEntry.hdfsOnlyEdits;
2796 }
2797
2798 boolean isHdfsRegioninfoPresent() {
2799 if (hdfsEntry == null) {
2800 return false;
2801 }
2802 return hdfsEntry.hdfsRegioninfoFilePresent;
2803 }
2804
2805 long getModTime() {
2806 if (hdfsEntry == null) {
2807 return 0;
2808 }
2809 return hdfsEntry.hdfsRegionDirModTime;
2810 }
2811
2812 HRegionInfo getHdfsHRI() {
2813 if (hdfsEntry == null) {
2814 return null;
2815 }
2816 return hdfsEntry.hri;
2817 }
2818
2819 public void setSkipChecks(boolean skipChecks) {
2820 this.skipChecks = skipChecks;
2821 }
2822
2823 public boolean isSkipChecks() {
2824 return skipChecks;
2825 }
2826 }
2827
2828 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
2829 @Override
2830 public int compare(HbckInfo l, HbckInfo r) {
2831 if (l == r) {
2832
2833 return 0;
2834 }
2835
2836 int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2837 l.getTableName(), r.getTableName());
2838 if (tableCompare != 0) {
2839 return tableCompare;
2840 }
2841
2842 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2843 l.getStartKey(), r.getStartKey());
2844 if (startComparison != 0) {
2845 return startComparison;
2846 }
2847
2848
2849 byte[] endKey = r.getEndKey();
2850 endKey = (endKey.length == 0) ? null : endKey;
2851 byte[] endKey2 = l.getEndKey();
2852 endKey2 = (endKey2.length == 0) ? null : endKey2;
2853 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2854 endKey2, endKey);
2855
2856 if (endComparison != 0) {
2857 return endComparison;
2858 }
2859
2860
2861
2862 if (l.hdfsEntry == null && r.hdfsEntry == null) {
2863 return 0;
2864 }
2865 if (l.hdfsEntry == null && r.hdfsEntry != null) {
2866 return 1;
2867 }
2868
2869 if (r.hdfsEntry == null) {
2870 return -1;
2871 }
2872
2873 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
2874 }
2875 };
2876
2877
2878
2879
2880 private void printTableSummary(SortedMap<String, TableInfo> tablesInfo) {
2881 StringBuilder sb = new StringBuilder();
2882 errors.print("Summary:");
2883 for (TableInfo tInfo : tablesInfo.values()) {
2884 if (errors.tableHasErrors(tInfo)) {
2885 errors.print("Table " + tInfo.getName() + " is inconsistent.");
2886 } else {
2887 errors.print(" " + tInfo.getName() + " is okay.");
2888 }
2889 errors.print(" Number of regions: " + tInfo.getNumRegions());
2890 sb.setLength(0);
2891 sb.append(" Deployed on: ");
2892 for (ServerName server : tInfo.deployedOn) {
2893 sb.append(" " + server.toString());
2894 }
2895 errors.print(sb.toString());
2896 }
2897 }
2898
2899 static ErrorReporter getErrorReporter(
2900 final Configuration conf) throws ClassNotFoundException {
2901 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
2902 return (ErrorReporter)ReflectionUtils.newInstance(reporter, conf);
2903 }
2904
2905 public interface ErrorReporter {
2906 public static enum ERROR_CODE {
2907 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
2908 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
2909 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
2910 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
2911 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
2912 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
2913 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK
2914 }
2915 public void clear();
2916 public void report(String message);
2917 public void reportError(String message);
2918 public void reportError(ERROR_CODE errorCode, String message);
2919 public void reportError(ERROR_CODE errorCode, String message, TableInfo table);
2920 public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
2921 public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2);
2922 public int summarize();
2923 public void detail(String details);
2924 public ArrayList<ERROR_CODE> getErrorList();
2925 public void progress();
2926 public void print(String message);
2927 public void resetErrors();
2928 public boolean tableHasErrors(TableInfo table);
2929 }
2930
2931 static class PrintingErrorReporter implements ErrorReporter {
2932 public int errorCount = 0;
2933 private int showProgress;
2934
2935 Set<TableInfo> errorTables = new HashSet<TableInfo>();
2936
2937
2938 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
2939
2940 public void clear() {
2941 errorTables.clear();
2942 errorList.clear();
2943 errorCount = 0;
2944 }
2945
2946 public synchronized void reportError(ERROR_CODE errorCode, String message) {
2947 if (errorCode == ERROR_CODE.WRONG_USAGE) {
2948 System.err.println(message);
2949 return;
2950 }
2951
2952 errorList.add(errorCode);
2953 if (!summary) {
2954 System.out.println("ERROR: " + message);
2955 }
2956 errorCount++;
2957 showProgress = 0;
2958 }
2959
2960 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2961 errorTables.add(table);
2962 reportError(errorCode, message);
2963 }
2964
2965 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
2966 HbckInfo info) {
2967 errorTables.add(table);
2968 String reference = "(region " + info.getRegionNameAsString() + ")";
2969 reportError(errorCode, reference + " " + message);
2970 }
2971
2972 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
2973 HbckInfo info1, HbckInfo info2) {
2974 errorTables.add(table);
2975 String reference = "(regions " + info1.getRegionNameAsString()
2976 + " and " + info2.getRegionNameAsString() + ")";
2977 reportError(errorCode, reference + " " + message);
2978 }
2979
2980 public synchronized void reportError(String message) {
2981 reportError(ERROR_CODE.UNKNOWN, message);
2982 }
2983
2984
2985
2986
2987
2988
2989 public synchronized void report(String message) {
2990 if (! summary) {
2991 System.out.println("ERROR: " + message);
2992 }
2993 showProgress = 0;
2994 }
2995
2996 public synchronized int summarize() {
2997 System.out.println(Integer.toString(errorCount) +
2998 " inconsistencies detected.");
2999 if (errorCount == 0) {
3000 System.out.println("Status: OK");
3001 return 0;
3002 } else {
3003 System.out.println("Status: INCONSISTENT");
3004 return -1;
3005 }
3006 }
3007
3008 public ArrayList<ERROR_CODE> getErrorList() {
3009 return errorList;
3010 }
3011
3012 public synchronized void print(String message) {
3013 if (!summary) {
3014 System.out.println(message);
3015 }
3016 }
3017
3018 @Override
3019 public boolean tableHasErrors(TableInfo table) {
3020 return errorTables.contains(table);
3021 }
3022
3023 @Override
3024 public void resetErrors() {
3025 errorCount = 0;
3026 }
3027
3028 public synchronized void detail(String message) {
3029 if (details) {
3030 System.out.println(message);
3031 }
3032 showProgress = 0;
3033 }
3034
3035 public synchronized void progress() {
3036 if (showProgress++ == 10) {
3037 if (!summary) {
3038 System.out.print(".");
3039 }
3040 showProgress = 0;
3041 }
3042 }
3043 }
3044
3045
3046
3047
3048 static class WorkItemRegion implements Callable<Void> {
3049 private HBaseFsck hbck;
3050 private ServerName rsinfo;
3051 private ErrorReporter errors;
3052 private HConnection connection;
3053
3054 WorkItemRegion(HBaseFsck hbck, ServerName info,
3055 ErrorReporter errors, HConnection connection) {
3056 this.hbck = hbck;
3057 this.rsinfo = info;
3058 this.errors = errors;
3059 this.connection = connection;
3060 }
3061
3062 @Override
3063 public synchronized Void call() throws IOException {
3064 errors.progress();
3065 try {
3066 BlockingInterface server = connection.getAdmin(rsinfo);
3067
3068
3069 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3070 regions = filterRegions(regions);
3071
3072 if (details) {
3073 errors.detail("RegionServer: " + rsinfo.getServerName() +
3074 " number of regions: " + regions.size());
3075 for (HRegionInfo rinfo: regions) {
3076 errors.detail(" " + rinfo.getRegionNameAsString() +
3077 " id: " + rinfo.getRegionId() +
3078 " encoded_name: " + rinfo.getEncodedName() +
3079 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3080 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3081 }
3082 }
3083
3084
3085 for (HRegionInfo r:regions) {
3086 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3087 hbi.addServer(r, rsinfo);
3088 }
3089 } catch (IOException e) {
3090 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3091 " Unable to fetch region information. " + e);
3092 throw e;
3093 }
3094 return null;
3095 }
3096
3097 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3098 List<HRegionInfo> ret = Lists.newArrayList();
3099 for (HRegionInfo hri : regions) {
3100 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3101 && hbck.isTableIncluded(hri.getTableNameAsString()))) {
3102 ret.add(hri);
3103 }
3104 }
3105 return ret;
3106 }
3107 }
3108
3109
3110
3111
3112
3113 static class WorkItemHdfsDir implements Callable<Void> {
3114 private HBaseFsck hbck;
3115 private FileStatus tableDir;
3116 private ErrorReporter errors;
3117 private FileSystem fs;
3118
3119 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3120 FileStatus status) {
3121 this.hbck = hbck;
3122 this.fs = fs;
3123 this.tableDir = status;
3124 this.errors = errors;
3125 }
3126
3127 @Override
3128 public synchronized Void call() throws IOException {
3129 try {
3130 String tableName = tableDir.getPath().getName();
3131
3132 if (tableName.startsWith(".") &&
3133 !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME))) {
3134 return null;
3135 }
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3191 private HbckInfo hbi;
3192 private HBaseFsck hbck;
3193 private ErrorReporter errors;
3194
3195 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3196 this.hbi = hbi;
3197 this.hbck = hbck;
3198 this.errors = errors;
3199 }
3200
3201 @Override
3202 public synchronized Void call() throws IOException {
3203
3204 if (hbi.getHdfsHRI() == null) {
3205 try {
3206 hbck.loadHdfsRegioninfo(hbi);
3207 } catch (IOException ioe) {
3208 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3209 + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
3210 + hbi.getHdfsRegionDir()
3211 + "! It may be an invalid format or version file. Treating as "
3212 + "an orphaned regiondir.";
3213 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3214 try {
3215 hbck.debugLsr(hbi.getHdfsRegionDir());
3216 } catch (IOException ioe2) {
3217 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3218 throw ioe2;
3219 }
3220 hbck.orphanHdfsDirs.add(hbi);
3221 throw ioe;
3222 }
3223 }
3224 return null;
3225 }
3226 };
3227
3228
3229
3230
3231
3232 public void setDisplayFullReport() {
3233 details = true;
3234 }
3235
3236
3237
3238
3239
3240 void setSummary() {
3241 summary = true;
3242 }
3243
3244
3245
3246
3247
3248 void setCheckMetaOnly() {
3249 checkMetaOnly = true;
3250 }
3251
3252
3253
3254
3255
3256 public void setFixTableLocks(boolean shouldFix) {
3257 fixTableLocks = shouldFix;
3258 }
3259
3260
3261
3262
3263
3264
3265
3266 void setShouldRerun() {
3267 rerun = true;
3268 }
3269
3270 boolean shouldRerun() {
3271 return rerun;
3272 }
3273
3274
3275
3276
3277
3278 public void setFixAssignments(boolean shouldFix) {
3279 fixAssignments = shouldFix;
3280 }
3281
3282 boolean shouldFixAssignments() {
3283 return fixAssignments;
3284 }
3285
3286 public void setFixMeta(boolean shouldFix) {
3287 fixMeta = shouldFix;
3288 }
3289
3290 boolean shouldFixMeta() {
3291 return fixMeta;
3292 }
3293
3294 public void setFixEmptyMetaCells(boolean shouldFix) {
3295 fixEmptyMetaCells = shouldFix;
3296 }
3297
3298 boolean shouldFixEmptyMetaCells() {
3299 return fixEmptyMetaCells;
3300 }
3301
3302 public void setCheckHdfs(boolean checking) {
3303 checkHdfs = checking;
3304 }
3305
3306 boolean shouldCheckHdfs() {
3307 return checkHdfs;
3308 }
3309
3310 public void setFixHdfsHoles(boolean shouldFix) {
3311 fixHdfsHoles = shouldFix;
3312 }
3313
3314 boolean shouldFixHdfsHoles() {
3315 return fixHdfsHoles;
3316 }
3317
3318 public void setFixTableOrphans(boolean shouldFix) {
3319 fixTableOrphans = shouldFix;
3320 }
3321
3322 boolean shouldFixTableOrphans() {
3323 return fixTableOrphans;
3324 }
3325
3326 public void setFixHdfsOverlaps(boolean shouldFix) {
3327 fixHdfsOverlaps = shouldFix;
3328 }
3329
3330 boolean shouldFixHdfsOverlaps() {
3331 return fixHdfsOverlaps;
3332 }
3333
3334 public void setFixHdfsOrphans(boolean shouldFix) {
3335 fixHdfsOrphans = shouldFix;
3336 }
3337
3338 boolean shouldFixHdfsOrphans() {
3339 return fixHdfsOrphans;
3340 }
3341
3342 public void setFixVersionFile(boolean shouldFix) {
3343 fixVersionFile = shouldFix;
3344 }
3345
3346 public boolean shouldFixVersionFile() {
3347 return fixVersionFile;
3348 }
3349
3350 public void setSidelineBigOverlaps(boolean sbo) {
3351 this.sidelineBigOverlaps = sbo;
3352 }
3353
3354 public boolean shouldSidelineBigOverlaps() {
3355 return sidelineBigOverlaps;
3356 }
3357
3358 public void setFixSplitParents(boolean shouldFix) {
3359 fixSplitParents = shouldFix;
3360 }
3361
3362 boolean shouldFixSplitParents() {
3363 return fixSplitParents;
3364 }
3365
3366 public void setFixReferenceFiles(boolean shouldFix) {
3367 fixReferenceFiles = shouldFix;
3368 }
3369
3370 boolean shouldFixReferenceFiles() {
3371 return fixReferenceFiles;
3372 }
3373
3374 public boolean shouldIgnorePreCheckPermission() {
3375 return ignorePreCheckPermission;
3376 }
3377
3378 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3379 this.ignorePreCheckPermission = ignorePreCheckPermission;
3380 }
3381
3382
3383
3384
3385 public void setMaxMerge(int mm) {
3386 this.maxMerge = mm;
3387 }
3388
3389 public int getMaxMerge() {
3390 return maxMerge;
3391 }
3392
3393 public void setMaxOverlapsToSideline(int mo) {
3394 this.maxOverlapsToSideline = mo;
3395 }
3396
3397 public int getMaxOverlapsToSideline() {
3398 return maxOverlapsToSideline;
3399 }
3400
3401
3402
3403
3404
3405 boolean isTableIncluded(String table) {
3406 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3407 }
3408
3409 public void includeTable(String table) {
3410 tablesIncluded.add(table);
3411 }
3412
3413 Set<String> getIncludedTables() {
3414 return new HashSet<String>(tablesIncluded);
3415 }
3416
3417
3418
3419
3420
3421
3422 public void setTimeLag(long seconds) {
3423 timelag = seconds * 1000;
3424 }
3425
3426
3427
3428
3429
3430 public void setSidelineDir(String sidelineDir) {
3431 this.sidelineDir = new Path(sidelineDir);
3432 }
3433
3434 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3435 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3436 }
3437
3438 public HFileCorruptionChecker getHFilecorruptionChecker() {
3439 return hfcc;
3440 }
3441
3442 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3443 this.hfcc = hfcc;
3444 }
3445
3446 public void setRetCode(int code) {
3447 this.retcode = code;
3448 }
3449
3450 public int getRetCode() {
3451 return retcode;
3452 }
3453
3454 protected HBaseFsck printUsageAndExit() {
3455 StringWriter sw = new StringWriter(2048);
3456 PrintWriter out = new PrintWriter(sw);
3457 out.println("Usage: fsck [opts] {only tables}");
3458 out.println(" where [opts] are:");
3459 out.println(" -help Display help options (this)");
3460 out.println(" -details Display full report of all regions.");
3461 out.println(" -timelag <timeInSeconds> Process only regions that " +
3462 " have not experienced any metadata updates in the last " +
3463 " <timeInSeconds> seconds.");
3464 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3465 " before checking if the fix worked if run with -fix");
3466 out.println(" -summary Print only summary of the tables and status.");
3467 out.println(" -metaonly Only check the state of the .META. table.");
3468 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3469
3470 out.println("");
3471 out.println(" Metadata Repair options: (expert features, use with caution!)");
3472 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
3473 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
3474 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
3475 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
3476 + " Assumes META region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3477 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
3478 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
3479 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3480 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
3481 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
3482 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3483 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
3484 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3485 out.println(" -fixSplitParents Try to force offline split parents to be online.");
3486 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
3487 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
3488 out.println(" -fixEmptyMetaCells Try to fix .META. entries not referencing any region"
3489 + " (empty REGIONINFO_QUALIFIER rows)");
3490
3491 out.println("");
3492 out.println(" Datafile Repair options: (expert features, use with caution!)");
3493 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
3494 out.println(" -sidelineCorruptHfiles Quarantine corrupted HFiles. implies -checkCorruptHfiles");
3495
3496 out.println("");
3497 out.println(" Metadata Repair shortcuts");
3498 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3499 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
3500 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3501
3502 out.println("");
3503 out.println(" Table lock options");
3504 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
3505
3506 out.flush();
3507 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3508
3509 setRetCode(-2);
3510 return this;
3511 }
3512
3513
3514
3515
3516
3517
3518
3519 public static void main(String[] args) throws Exception {
3520
3521 Configuration conf = HBaseConfiguration.create();
3522 Path hbasedir = FSUtils.getRootDir(conf);
3523 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3524 FSUtils.setFsDefault(conf, new Path(defaultFs));
3525
3526 int ret = ToolRunner.run(new HBaseFsck(conf), args);
3527 System.exit(ret);
3528 }
3529
3530 @Override
3531 public int run(String[] args) throws Exception {
3532 exec(executor, args);
3533 return getRetCode();
3534 }
3535
3536 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
3537 ServiceException, InterruptedException {
3538 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3539
3540 boolean checkCorruptHFiles = false;
3541 boolean sidelineCorruptHFiles = false;
3542
3543
3544 for (int i = 0; i < args.length; i++) {
3545 String cmd = args[i];
3546 if (cmd.equals("-help") || cmd.equals("-h")) {
3547 return printUsageAndExit();
3548 } else if (cmd.equals("-details")) {
3549 setDisplayFullReport();
3550 } else if (cmd.equals("-timelag")) {
3551 if (i == args.length - 1) {
3552 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3553 return printUsageAndExit();
3554 }
3555 try {
3556 long timelag = Long.parseLong(args[i+1]);
3557 setTimeLag(timelag);
3558 } catch (NumberFormatException e) {
3559 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3560 return printUsageAndExit();
3561 }
3562 i++;
3563 } else if (cmd.equals("-sleepBeforeRerun")) {
3564 if (i == args.length - 1) {
3565 errors.reportError(ERROR_CODE.WRONG_USAGE,
3566 "HBaseFsck: -sleepBeforeRerun needs a value.");
3567 return printUsageAndExit();
3568 }
3569 try {
3570 sleepBeforeRerun = Long.parseLong(args[i+1]);
3571 } catch (NumberFormatException e) {
3572 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3573 return printUsageAndExit();
3574 }
3575 i++;
3576 } else if (cmd.equals("-sidelineDir")) {
3577 if (i == args.length - 1) {
3578 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3579 return printUsageAndExit();
3580 }
3581 i++;
3582 setSidelineDir(args[i]);
3583 } else if (cmd.equals("-fix")) {
3584 errors.reportError(ERROR_CODE.WRONG_USAGE,
3585 "This option is deprecated, please use -fixAssignments instead.");
3586 setFixAssignments(true);
3587 } else if (cmd.equals("-fixAssignments")) {
3588 setFixAssignments(true);
3589 } else if (cmd.equals("-fixMeta")) {
3590 setFixMeta(true);
3591 } else if (cmd.equals("-noHdfsChecking")) {
3592 setCheckHdfs(false);
3593 } else if (cmd.equals("-fixHdfsHoles")) {
3594 setFixHdfsHoles(true);
3595 } else if (cmd.equals("-fixHdfsOrphans")) {
3596 setFixHdfsOrphans(true);
3597 } else if (cmd.equals("-fixTableOrphans")) {
3598 setFixTableOrphans(true);
3599 } else if (cmd.equals("-fixHdfsOverlaps")) {
3600 setFixHdfsOverlaps(true);
3601 } else if (cmd.equals("-fixVersionFile")) {
3602 setFixVersionFile(true);
3603 } else if (cmd.equals("-sidelineBigOverlaps")) {
3604 setSidelineBigOverlaps(true);
3605 } else if (cmd.equals("-fixSplitParents")) {
3606 setFixSplitParents(true);
3607 } else if (cmd.equals("-ignorePreCheckPermission")) {
3608 setIgnorePreCheckPermission(true);
3609 } else if (cmd.equals("-checkCorruptHFiles")) {
3610 checkCorruptHFiles = true;
3611 } else if (cmd.equals("-sidelineCorruptHFiles")) {
3612 sidelineCorruptHFiles = true;
3613 } else if (cmd.equals("-fixReferenceFiles")) {
3614 setFixReferenceFiles(true);
3615 } else if (cmd.equals("-fixEmptyMetaCells")) {
3616 setFixEmptyMetaCells(true);
3617 } else if (cmd.equals("-repair")) {
3618
3619
3620 setFixHdfsHoles(true);
3621 setFixHdfsOrphans(true);
3622 setFixMeta(true);
3623 setFixAssignments(true);
3624 setFixHdfsOverlaps(true);
3625 setFixVersionFile(true);
3626 setSidelineBigOverlaps(true);
3627 setFixSplitParents(false);
3628 setCheckHdfs(true);
3629 setFixReferenceFiles(true);
3630 setFixTableLocks(true);
3631 } else if (cmd.equals("-repairHoles")) {
3632
3633 setFixHdfsHoles(true);
3634 setFixHdfsOrphans(false);
3635 setFixMeta(true);
3636 setFixAssignments(true);
3637 setFixHdfsOverlaps(false);
3638 setSidelineBigOverlaps(false);
3639 setFixSplitParents(false);
3640 setCheckHdfs(true);
3641 } else if (cmd.equals("-maxOverlapsToSideline")) {
3642 if (i == args.length - 1) {
3643 errors.reportError(ERROR_CODE.WRONG_USAGE,
3644 "-maxOverlapsToSideline needs a numeric value argument.");
3645 return printUsageAndExit();
3646 }
3647 try {
3648 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
3649 setMaxOverlapsToSideline(maxOverlapsToSideline);
3650 } catch (NumberFormatException e) {
3651 errors.reportError(ERROR_CODE.WRONG_USAGE,
3652 "-maxOverlapsToSideline needs a numeric value argument.");
3653 return printUsageAndExit();
3654 }
3655 i++;
3656 } else if (cmd.equals("-maxMerge")) {
3657 if (i == args.length - 1) {
3658 errors.reportError(ERROR_CODE.WRONG_USAGE,
3659 "-maxMerge needs a numeric value argument.");
3660 return printUsageAndExit();
3661 }
3662 try {
3663 int maxMerge = Integer.parseInt(args[i+1]);
3664 setMaxMerge(maxMerge);
3665 } catch (NumberFormatException e) {
3666 errors.reportError(ERROR_CODE.WRONG_USAGE,
3667 "-maxMerge needs a numeric value argument.");
3668 return printUsageAndExit();
3669 }
3670 i++;
3671 } else if (cmd.equals("-summary")) {
3672 setSummary();
3673 } else if (cmd.equals("-metaonly")) {
3674 setCheckMetaOnly();
3675 } else if (cmd.equals("-fixTableLocks")) {
3676 setFixTableLocks(true);
3677 } else if (cmd.startsWith("-")) {
3678 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3679 return printUsageAndExit();
3680 } else {
3681 includeTable(cmd);
3682 errors.print("Allow checking/fixes for table: " + cmd);
3683 }
3684 }
3685
3686
3687 try {
3688 preCheckPermission();
3689 } catch (AccessControlException ace) {
3690 Runtime.getRuntime().exit(-1);
3691 } catch (IOException ioe) {
3692 Runtime.getRuntime().exit(-1);
3693 }
3694
3695
3696 connect();
3697
3698
3699 if (checkCorruptHFiles || sidelineCorruptHFiles) {
3700 LOG.info("Checking all hfiles for corruption");
3701 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3702 setHFileCorruptionChecker(hfcc);
3703 Collection<String> tables = getIncludedTables();
3704 Collection<Path> tableDirs = new ArrayList<Path>();
3705 Path rootdir = FSUtils.getRootDir(getConf());
3706 if (tables.size() > 0) {
3707 for (String t : tables) {
3708 tableDirs.add(FSUtils.getTablePath(rootdir, t));
3709 }
3710 } else {
3711 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
3712 }
3713 hfcc.checkTables(tableDirs);
3714 hfcc.report(errors);
3715 }
3716
3717
3718 int code = onlineHbck();
3719 setRetCode(code);
3720
3721
3722
3723
3724 if (shouldRerun()) {
3725 try {
3726 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3727 Thread.sleep(sleepBeforeRerun);
3728 } catch (InterruptedException ie) {
3729 return this;
3730 }
3731
3732 setFixAssignments(false);
3733 setFixMeta(false);
3734 setFixHdfsHoles(false);
3735 setFixHdfsOverlaps(false);
3736 setFixVersionFile(false);
3737 setFixTableOrphans(false);
3738 errors.resetErrors();
3739 code = onlineHbck();
3740 setRetCode(code);
3741 }
3742 return this;
3743 }
3744
3745
3746
3747
3748 void debugLsr(Path p) throws IOException {
3749 debugLsr(getConf(), p, errors);
3750 }
3751
3752
3753
3754
3755 public static void debugLsr(Configuration conf,
3756 Path p) throws IOException {
3757 debugLsr(conf, p, new PrintingErrorReporter());
3758 }
3759
3760
3761
3762
3763 public static void debugLsr(Configuration conf,
3764 Path p, ErrorReporter errors) throws IOException {
3765 if (!LOG.isDebugEnabled() || p == null) {
3766 return;
3767 }
3768 FileSystem fs = p.getFileSystem(conf);
3769
3770 if (!fs.exists(p)) {
3771
3772 return;
3773 }
3774 errors.print(p.toString());
3775
3776 if (fs.isFile(p)) {
3777 return;
3778 }
3779
3780 if (fs.getFileStatus(p).isDir()) {
3781 FileStatus[] fss= fs.listStatus(p);
3782 for (FileStatus status : fss) {
3783 debugLsr(conf, status.getPath(), errors);
3784 }
3785 }
3786 }
3787 }