001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.io.Closeable; 021import java.io.FileNotFoundException; 022import java.io.IOException; 023import java.io.InterruptedIOException; 024import java.io.PrintWriter; 025import java.io.StringWriter; 026import java.net.InetAddress; 027import java.net.URI; 028import java.util.ArrayList; 029import java.util.Collection; 030import java.util.Collections; 031import java.util.Comparator; 032import java.util.EnumSet; 033import java.util.HashMap; 034import java.util.HashSet; 035import java.util.Iterator; 036import java.util.List; 037import java.util.Locale; 038import java.util.Map; 039import java.util.Map.Entry; 040import java.util.Objects; 041import java.util.Optional; 042import java.util.Set; 043import java.util.SortedMap; 044import java.util.TreeMap; 045import java.util.Vector; 046import java.util.concurrent.Callable; 047import java.util.concurrent.ConcurrentSkipListMap; 048import java.util.concurrent.ExecutionException; 049import java.util.concurrent.ExecutorService; 050import java.util.concurrent.Executors; 051import java.util.concurrent.Future; 052import java.util.concurrent.FutureTask; 053import java.util.concurrent.ScheduledThreadPoolExecutor; 054import java.util.concurrent.TimeUnit; 055import java.util.concurrent.TimeoutException; 056import java.util.concurrent.atomic.AtomicBoolean; 057import java.util.concurrent.atomic.AtomicInteger; 058 059import org.apache.commons.io.IOUtils; 060import org.apache.commons.lang3.StringUtils; 061import org.apache.hadoop.conf.Configuration; 062import org.apache.hadoop.conf.Configured; 063import org.apache.hadoop.fs.FSDataOutputStream; 064import org.apache.hadoop.fs.FileStatus; 065import org.apache.hadoop.fs.FileSystem; 066import org.apache.hadoop.fs.Path; 067import org.apache.hadoop.fs.permission.FsAction; 068import org.apache.hadoop.fs.permission.FsPermission; 069import org.apache.hadoop.hbase.Abortable; 070import org.apache.hadoop.hbase.Cell; 071import org.apache.hadoop.hbase.CellUtil; 072import org.apache.hadoop.hbase.ClusterMetrics; 073import org.apache.hadoop.hbase.ClusterMetrics.Option; 074import org.apache.hadoop.hbase.HBaseConfiguration; 075import org.apache.hadoop.hbase.HBaseInterfaceAudience; 076import org.apache.hadoop.hbase.HConstants; 077import org.apache.hadoop.hbase.HRegionLocation; 078import org.apache.hadoop.hbase.KeyValue; 079import org.apache.hadoop.hbase.MasterNotRunningException; 080import org.apache.hadoop.hbase.MetaTableAccessor; 081import org.apache.hadoop.hbase.RegionLocations; 082import org.apache.hadoop.hbase.ServerName; 083import org.apache.hadoop.hbase.TableName; 084import org.apache.hadoop.hbase.ZooKeeperConnectionException; 085import org.apache.hadoop.hbase.client.Admin; 086import org.apache.hadoop.hbase.client.ClusterConnection; 087import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 088import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 089import org.apache.hadoop.hbase.client.Connection; 090import org.apache.hadoop.hbase.client.ConnectionFactory; 091import org.apache.hadoop.hbase.client.Delete; 092import org.apache.hadoop.hbase.client.Get; 093import org.apache.hadoop.hbase.client.Put; 094import org.apache.hadoop.hbase.client.RegionInfo; 095import org.apache.hadoop.hbase.client.RegionInfoBuilder; 096import org.apache.hadoop.hbase.client.RegionReplicaUtil; 097import org.apache.hadoop.hbase.client.Result; 098import org.apache.hadoop.hbase.client.RowMutations; 099import org.apache.hadoop.hbase.client.Table; 100import org.apache.hadoop.hbase.client.TableDescriptor; 101import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 102import org.apache.hadoop.hbase.client.TableState; 103import org.apache.hadoop.hbase.io.FileLink; 104import org.apache.hadoop.hbase.io.HFileLink; 105import org.apache.hadoop.hbase.io.hfile.CacheConfig; 106import org.apache.hadoop.hbase.io.hfile.HFile; 107import org.apache.hadoop.hbase.master.RegionState; 108import org.apache.hadoop.hbase.regionserver.HRegion; 109import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 110import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 111import org.apache.hadoop.hbase.security.AccessDeniedException; 112import org.apache.hadoop.hbase.security.UserProvider; 113import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator; 114import org.apache.hadoop.hbase.util.HbckErrorReporter.ERROR_CODE; 115import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; 116import org.apache.hadoop.hbase.util.hbck.ReplicationChecker; 117import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler; 118import org.apache.hadoop.hbase.wal.WALSplitter; 119import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 120import org.apache.hadoop.hbase.zookeeper.ZKUtil; 121import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 122import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 123import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 124import org.apache.hadoop.ipc.RemoteException; 125import org.apache.hadoop.security.UserGroupInformation; 126import org.apache.hadoop.util.ReflectionUtils; 127import org.apache.hadoop.util.Tool; 128import org.apache.hadoop.util.ToolRunner; 129import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 130import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 131import org.apache.yetus.audience.InterfaceAudience; 132import org.apache.yetus.audience.InterfaceStability; 133import org.apache.zookeeper.KeeperException; 134import org.slf4j.Logger; 135import org.slf4j.LoggerFactory; 136 137import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 138import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 139import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 140 141import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 142import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService.BlockingInterface; 143 144/** 145 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and 146 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not 147 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'. 148 * See hbck2 (HBASE-19121) for a hbck tool for hbase2. 149 * 150 * <p> 151 * Region consistency checks verify that hbase:meta, region deployment on region 152 * servers and the state of data in HDFS (.regioninfo files) all are in 153 * accordance. 154 * <p> 155 * Table integrity checks verify that all possible row keys resolve to exactly 156 * one region of a table. This means there are no individual degenerate 157 * or backwards regions; no holes between regions; and that there are no 158 * overlapping regions. 159 * <p> 160 * The general repair strategy works in two phases: 161 * <ol> 162 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions) 163 * <li> Repair Region Consistency with hbase:meta and assignments 164 * </ol> 165 * <p> 166 * For table integrity repairs, the tables' region directories are scanned 167 * for .regioninfo files. Each table's integrity is then verified. If there 168 * are any orphan regions (regions with no .regioninfo files) or holes, new 169 * regions are fabricated. Backwards regions are sidelined as well as empty 170 * degenerate (endkey==startkey) regions. If there are any overlapping regions, 171 * a new region is created and all data is merged into the new region. 172 * <p> 173 * Table integrity repairs deal solely with HDFS and could potentially be done 174 * offline -- the hbase region servers or master do not need to be running. 175 * This phase can eventually be used to completely reconstruct the hbase:meta table in 176 * an offline fashion. 177 * <p> 178 * Region consistency requires three conditions -- 1) valid .regioninfo file 179 * present in an HDFS region dir, 2) valid row with .regioninfo data in META, 180 * and 3) a region is deployed only at the regionserver that was assigned to 181 * with proper state in the master. 182 * <p> 183 * Region consistency repairs require hbase to be online so that hbck can 184 * contact the HBase master and region servers. The hbck#connect() method must 185 * first be called successfully. Much of the region consistency information 186 * is transient and less risky to repair. 187 * <p> 188 * If hbck is run from the command line, there are a handful of arguments that 189 * can be used to limit the kinds of repairs hbck will do. See the code in 190 * {@link #printUsageAndExit()} for more details. 191 */ 192@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) 193@InterfaceStability.Evolving 194public class HBaseFsck extends Configured implements Closeable { 195 public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute 196 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000; 197 private static final int MAX_NUM_THREADS = 50; // #threads to contact regions 198 private static boolean rsSupportsOffline = true; 199 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2; 200 private static final int DEFAULT_MAX_MERGE = 5; 201 202 /** 203 * Here is where hbase-1.x used to default the lock for hbck1. 204 * It puts in place a lock when it goes to write/make changes. 205 */ 206 @VisibleForTesting 207 public static final String HBCK_LOCK_FILE = "hbase-hbck.lock"; 208 private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5; 209 private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds 210 private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds 211 // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD. 212 // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for 213 // AlreadyBeingCreatedException which is implies timeout on this operations up to 214 // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). 215 private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds 216 private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5; 217 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds 218 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds 219 220 /********************** 221 * Internal resources 222 **********************/ 223 private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName()); 224 private ClusterMetrics status; 225 private ClusterConnection connection; 226 private Admin admin; 227 private Table meta; 228 // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions 229 protected ExecutorService executor; 230 private long startMillis = EnvironmentEdgeManager.currentTime(); 231 private HFileCorruptionChecker hfcc; 232 private int retcode = 0; 233 private Path HBCK_LOCK_PATH; 234 private FSDataOutputStream hbckOutFd; 235 // This lock is to prevent cleanup of balancer resources twice between 236 // ShutdownHook and the main code. We cleanup only if the connect() is 237 // successful 238 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false); 239 240 // Unsupported options in HBase 2.0+ 241 private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix", 242 "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans", 243 "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents", 244 "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge"); 245 246 /*********** 247 * Options 248 ***********/ 249 private static boolean details = false; // do we display the full report 250 private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older 251 private static boolean forceExclusive = false; // only this hbck can modify HBase 252 private boolean fixAssignments = false; // fix assignment errors? 253 private boolean fixMeta = false; // fix meta errors? 254 private boolean checkHdfs = true; // load and check fs consistency? 255 private boolean fixHdfsHoles = false; // fix fs holes? 256 private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky) 257 private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo) 258 private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo) 259 private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs 260 private boolean fixSplitParents = false; // fix lingering split parents 261 private boolean removeParents = false; // remove split parents 262 private boolean fixReferenceFiles = false; // fix lingering reference store file 263 private boolean fixHFileLinks = false; // fix lingering HFileLinks 264 private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows 265 private boolean fixReplication = false; // fix undeleted replication queues for removed peer 266 private boolean fixAny = false; // Set to true if any of the fix is required. 267 268 // limit checking/fixes to listed tables, if empty attempt to check/fix all 269 // hbase:meta are always checked 270 private Set<TableName> tablesIncluded = new HashSet<>(); 271 private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge 272 // maximum number of overlapping regions to sideline 273 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; 274 private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions 275 private Path sidelineDir = null; 276 277 private boolean rerun = false; // if we tried to fix something, rerun hbck 278 private static boolean summary = false; // if we want to print less output 279 private boolean checkMetaOnly = false; 280 private boolean checkRegionBoundaries = false; 281 private boolean ignorePreCheckPermission = false; // if pre-check permission 282 283 /********* 284 * State 285 *********/ 286 final private HbckErrorReporter errors; 287 int fixes = 0; 288 289 /** 290 * This map contains the state of all hbck items. It maps from encoded region 291 * name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used 292 * to detect and correct consistency (hdfs/meta/deployment) problems. 293 */ 294 private TreeMap<String, HbckRegionInfo> regionInfoMap = new TreeMap<>(); 295 // Empty regioninfo qualifiers in hbase:meta 296 private Set<Result> emptyRegionInfoQualifiers = new HashSet<>(); 297 298 /** 299 * This map from Tablename -> TableInfo contains the structures necessary to 300 * detect table consistency problems (holes, dupes, overlaps). It is sorted 301 * to prevent dupes. 302 * 303 * If tablesIncluded is empty, this map contains all tables. 304 * Otherwise, it contains only meta tables and tables in tablesIncluded, 305 * unless checkMetaOnly is specified, in which case, it contains only 306 * the meta table 307 */ 308 private SortedMap<TableName, HbckTableInfo> tablesInfo = new ConcurrentSkipListMap<>(); 309 310 /** 311 * When initially looking at HDFS, we attempt to find any orphaned data. 312 */ 313 private List<HbckRegionInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<>()); 314 315 private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>(); 316 private Map<TableName, TableState> tableStates = new HashMap<>(); 317 private final RetryCounterFactory lockFileRetryCounterFactory; 318 private final RetryCounterFactory createZNodeRetryCounterFactory; 319 320 private Map<TableName, Set<String>> skippedRegions = new HashMap<>(); 321 322 private ZKWatcher zkw = null; 323 private String hbckEphemeralNodePath = null; 324 private boolean hbckZodeCreated = false; 325 326 /** 327 * Constructor 328 * 329 * @param conf Configuration object 330 * @throws MasterNotRunningException if the master is not running 331 * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper 332 */ 333 public HBaseFsck(Configuration conf) throws MasterNotRunningException, 334 ZooKeeperConnectionException, IOException, ClassNotFoundException { 335 this(conf, createThreadPool(conf)); 336 } 337 338 private static ExecutorService createThreadPool(Configuration conf) { 339 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS); 340 return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck")); 341 } 342 343 /** 344 * Constructor 345 * 346 * @param conf 347 * Configuration object 348 * @throws MasterNotRunningException 349 * if the master is not running 350 * @throws ZooKeeperConnectionException 351 * if unable to connect to ZooKeeper 352 */ 353 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException, 354 ZooKeeperConnectionException, IOException, ClassNotFoundException { 355 super(conf); 356 errors = getErrorReporter(getConf()); 357 this.executor = exec; 358 lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf()); 359 createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf()); 360 zkw = createZooKeeperWatcher(); 361 } 362 363 /** 364 * @return A retry counter factory configured for retrying lock file creation. 365 */ 366 public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) { 367 return new RetryCounterFactory( 368 conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), 369 conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval", 370 DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), 371 conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime", 372 DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); 373 } 374 375 /** 376 * @return A retry counter factory configured for retrying znode creation. 377 */ 378 private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) { 379 return new RetryCounterFactory( 380 conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS), 381 conf.getInt("hbase.hbck.createznode.attempt.sleep.interval", 382 DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL), 383 conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime", 384 DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME)); 385 } 386 387 /** 388 * @return Return the tmp dir this tool writes too. 389 */ 390 @VisibleForTesting 391 public static Path getTmpDir(Configuration conf) throws IOException { 392 return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY); 393 } 394 395 private static class FileLockCallable implements Callable<FSDataOutputStream> { 396 RetryCounter retryCounter; 397 private final Configuration conf; 398 private Path hbckLockPath = null; 399 400 public FileLockCallable(Configuration conf, RetryCounter retryCounter) { 401 this.retryCounter = retryCounter; 402 this.conf = conf; 403 } 404 405 /** 406 * @return Will be <code>null</code> unless you call {@link #call()} 407 */ 408 Path getHbckLockPath() { 409 return this.hbckLockPath; 410 } 411 412 @Override 413 public FSDataOutputStream call() throws IOException { 414 try { 415 FileSystem fs = FSUtils.getCurrentFileSystem(this.conf); 416 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf, 417 HConstants.DATA_FILE_UMASK_KEY); 418 Path tmpDir = getTmpDir(conf); 419 this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE); 420 fs.mkdirs(tmpDir); 421 final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms); 422 out.writeBytes(InetAddress.getLocalHost().toString()); 423 // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file. 424 out.writeBytes(" Written by an hbase-2.x Master to block an " + 425 "attempt by an hbase-1.x HBCK tool making modification to state. " + 426 "See 'HBCK must match HBase server version' in the hbase refguide."); 427 out.flush(); 428 return out; 429 } catch(RemoteException e) { 430 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){ 431 return null; 432 } else { 433 throw e; 434 } 435 } 436 } 437 438 private FSDataOutputStream createFileWithRetries(final FileSystem fs, 439 final Path hbckLockFilePath, final FsPermission defaultPerms) 440 throws IOException { 441 IOException exception = null; 442 do { 443 try { 444 return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false); 445 } catch (IOException ioe) { 446 LOG.info("Failed to create lock file " + hbckLockFilePath.getName() 447 + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of " 448 + retryCounter.getMaxAttempts()); 449 LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 450 ioe); 451 try { 452 exception = ioe; 453 retryCounter.sleepUntilNextRetry(); 454 } catch (InterruptedException ie) { 455 throw (InterruptedIOException) new InterruptedIOException( 456 "Can't create lock file " + hbckLockFilePath.getName()) 457 .initCause(ie); 458 } 459 } 460 } while (retryCounter.shouldRetry()); 461 462 throw exception; 463 } 464 } 465 466 /** 467 * This method maintains a lock using a file. If the creation fails we return null 468 * 469 * @return FSDataOutputStream object corresponding to the newly opened lock file 470 * @throws IOException if IO failure occurs 471 */ 472 public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf, 473 RetryCounter retryCounter) throws IOException { 474 FileLockCallable callable = new FileLockCallable(conf, retryCounter); 475 ExecutorService executor = Executors.newFixedThreadPool(1); 476 FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable); 477 executor.execute(futureTask); 478 final int timeoutInSeconds = conf.getInt( 479 "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT); 480 FSDataOutputStream stream = null; 481 try { 482 stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS); 483 } catch (ExecutionException ee) { 484 LOG.warn("Encountered exception when opening lock file", ee); 485 } catch (InterruptedException ie) { 486 LOG.warn("Interrupted when opening lock file", ie); 487 Thread.currentThread().interrupt(); 488 } catch (TimeoutException exception) { 489 // took too long to obtain lock 490 LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock"); 491 futureTask.cancel(true); 492 } finally { 493 executor.shutdownNow(); 494 } 495 return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream); 496 } 497 498 private void unlockHbck() { 499 if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) { 500 RetryCounter retryCounter = lockFileRetryCounterFactory.create(); 501 do { 502 try { 503 IOUtils.closeQuietly(hbckOutFd); 504 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true); 505 LOG.info("Finishing hbck"); 506 return; 507 } catch (IOException ioe) { 508 LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try=" 509 + (retryCounter.getAttemptTimes() + 1) + " of " 510 + retryCounter.getMaxAttempts()); 511 LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe); 512 try { 513 retryCounter.sleepUntilNextRetry(); 514 } catch (InterruptedException ie) { 515 Thread.currentThread().interrupt(); 516 LOG.warn("Interrupted while deleting lock file" + 517 HBCK_LOCK_PATH); 518 return; 519 } 520 } 521 } while (retryCounter.shouldRetry()); 522 } 523 } 524 525 /** 526 * To repair region consistency, one must call connect() in order to repair 527 * online state. 528 */ 529 public void connect() throws IOException { 530 531 if (isExclusive()) { 532 // Grab the lock 533 Pair<Path, FSDataOutputStream> pair = 534 checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create()); 535 HBCK_LOCK_PATH = pair.getFirst(); 536 this.hbckOutFd = pair.getSecond(); 537 if (hbckOutFd == null) { 538 setRetCode(-1); 539 LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " + 540 "[If you are sure no other instance is running, delete the lock file " + 541 HBCK_LOCK_PATH + " and rerun the tool]"); 542 throw new IOException("Duplicate hbck - Abort"); 543 } 544 545 // Make sure to cleanup the lock 546 hbckLockCleanup.set(true); 547 } 548 549 550 // Add a shutdown hook to this thread, in case user tries to 551 // kill the hbck with a ctrl-c, we want to cleanup the lock so that 552 // it is available for further calls 553 Runtime.getRuntime().addShutdownHook(new Thread() { 554 @Override 555 public void run() { 556 IOUtils.closeQuietly(HBaseFsck.this); 557 cleanupHbckZnode(); 558 unlockHbck(); 559 } 560 }); 561 562 LOG.info("Launching hbck"); 563 564 connection = (ClusterConnection)ConnectionFactory.createConnection(getConf()); 565 admin = connection.getAdmin(); 566 meta = connection.getTable(TableName.META_TABLE_NAME); 567 status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS, 568 Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS, 569 Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION)); 570 } 571 572 /** 573 * Get deployed regions according to the region servers. 574 */ 575 private void loadDeployedRegions() throws IOException, InterruptedException { 576 // From the master, get a list of all known live region servers 577 Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet(); 578 errors.print("Number of live region servers: " + regionServers.size()); 579 if (details) { 580 for (ServerName rsinfo: regionServers) { 581 errors.print(" " + rsinfo.getServerName()); 582 } 583 } 584 585 // From the master, get a list of all dead region servers 586 Collection<ServerName> deadRegionServers = status.getDeadServerNames(); 587 errors.print("Number of dead region servers: " + deadRegionServers.size()); 588 if (details) { 589 for (ServerName name: deadRegionServers) { 590 errors.print(" " + name); 591 } 592 } 593 594 // Print the current master name and state 595 errors.print("Master: " + status.getMasterName()); 596 597 // Print the list of all backup masters 598 Collection<ServerName> backupMasters = status.getBackupMasterNames(); 599 errors.print("Number of backup masters: " + backupMasters.size()); 600 if (details) { 601 for (ServerName name: backupMasters) { 602 errors.print(" " + name); 603 } 604 } 605 606 errors.print("Average load: " + status.getAverageLoad()); 607 errors.print("Number of requests: " + status.getRequestCount()); 608 errors.print("Number of regions: " + status.getRegionCount()); 609 610 List<RegionState> rits = status.getRegionStatesInTransition(); 611 errors.print("Number of regions in transition: " + rits.size()); 612 if (details) { 613 for (RegionState state: rits) { 614 errors.print(" " + state.toDescriptiveString()); 615 } 616 } 617 618 // Determine what's deployed 619 processRegionServers(regionServers); 620 } 621 622 /** 623 * Clear the current state of hbck. 624 */ 625 private void clearState() { 626 // Make sure regionInfo is empty before starting 627 fixes = 0; 628 regionInfoMap.clear(); 629 emptyRegionInfoQualifiers.clear(); 630 tableStates.clear(); 631 errors.clear(); 632 tablesInfo.clear(); 633 orphanHdfsDirs.clear(); 634 skippedRegions.clear(); 635 } 636 637 /** 638 * This repair method analyzes hbase data in hdfs and repairs it to satisfy 639 * the table integrity rules. HBase doesn't need to be online for this 640 * operation to work. 641 */ 642 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException { 643 // Initial pass to fix orphans. 644 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles() 645 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) { 646 LOG.info("Loading regioninfos HDFS"); 647 // if nothing is happening this should always complete in two iterations. 648 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3); 649 int curIter = 0; 650 do { 651 clearState(); // clears hbck state and reset fixes to 0 and. 652 // repair what's on HDFS 653 restoreHdfsIntegrity(); 654 curIter++;// limit the number of iterations. 655 } while (fixes > 0 && curIter <= maxIterations); 656 657 // Repairs should be done in the first iteration and verification in the second. 658 // If there are more than 2 passes, something funny has happened. 659 if (curIter > 2) { 660 if (curIter == maxIterations) { 661 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. " 662 + "Tables integrity may not be fully repaired!"); 663 } else { 664 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations"); 665 } 666 } 667 } 668 } 669 670 /** 671 * This repair method requires the cluster to be online since it contacts 672 * region servers and the masters. It makes each region's state in HDFS, in 673 * hbase:meta, and deployments consistent. 674 * 675 * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable 676 * error. If 0, we have a clean hbase. 677 */ 678 public int onlineConsistencyRepair() throws IOException, KeeperException, 679 InterruptedException { 680 681 // get regions according to what is online on each RegionServer 682 loadDeployedRegions(); 683 // check whether hbase:meta is deployed and online 684 recordMetaRegion(); 685 // Check if hbase:meta is found only once and in the right place 686 if (!checkMetaRegion()) { 687 String errorMsg = "hbase:meta table is not consistent. "; 688 if (shouldFixAssignments()) { 689 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state."; 690 } else { 691 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency."; 692 } 693 errors.reportError(errorMsg + " Exiting..."); 694 return -2; 695 } 696 // Not going with further consistency check for tables when hbase:meta itself is not consistent. 697 LOG.info("Loading regionsinfo from the hbase:meta table"); 698 boolean success = loadMetaEntries(); 699 if (!success) return -1; 700 701 // Empty cells in hbase:meta? 702 reportEmptyMetaCells(); 703 704 // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta 705 if (shouldFixEmptyMetaCells()) { 706 fixEmptyMetaCells(); 707 } 708 709 // get a list of all tables that have not changed recently. 710 if (!checkMetaOnly) { 711 reportTablesInFlux(); 712 } 713 714 // Get disabled tables states 715 loadTableStates(); 716 717 // load regiondirs and regioninfos from HDFS 718 if (shouldCheckHdfs()) { 719 LOG.info("Loading region directories from HDFS"); 720 loadHdfsRegionDirs(); 721 LOG.info("Loading region information from HDFS"); 722 loadHdfsRegionInfos(); 723 } 724 725 // fix the orphan tables 726 fixOrphanTables(); 727 728 LOG.info("Checking and fixing region consistency"); 729 // Check and fix consistency 730 checkAndFixConsistency(); 731 732 // Check integrity (does not fix) 733 checkIntegrity(); 734 return errors.getErrorList().size(); 735 } 736 737 /** 738 * This method maintains an ephemeral znode. If the creation fails we return false or throw 739 * exception 740 * 741 * @return true if creating znode succeeds; false otherwise 742 * @throws IOException if IO failure occurs 743 */ 744 private boolean setMasterInMaintenanceMode() throws IOException { 745 RetryCounter retryCounter = createZNodeRetryCounterFactory.create(); 746 hbckEphemeralNodePath = ZNodePaths.joinZNode( 747 zkw.znodePaths.masterMaintZNode, 748 "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime())); 749 do { 750 try { 751 hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null); 752 if (hbckZodeCreated) { 753 break; 754 } 755 } catch (KeeperException e) { 756 if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) { 757 throw new IOException("Can't create znode " + hbckEphemeralNodePath, e); 758 } 759 // fall through and retry 760 } 761 762 LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" + 763 (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts()); 764 765 try { 766 retryCounter.sleepUntilNextRetry(); 767 } catch (InterruptedException ie) { 768 throw (InterruptedIOException) new InterruptedIOException( 769 "Can't create znode " + hbckEphemeralNodePath).initCause(ie); 770 } 771 } while (retryCounter.shouldRetry()); 772 return hbckZodeCreated; 773 } 774 775 private void cleanupHbckZnode() { 776 try { 777 if (zkw != null && hbckZodeCreated) { 778 ZKUtil.deleteNode(zkw, hbckEphemeralNodePath); 779 hbckZodeCreated = false; 780 } 781 } catch (KeeperException e) { 782 // Ignore 783 if (!e.code().equals(KeeperException.Code.NONODE)) { 784 LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e); 785 } 786 } 787 } 788 789 /** 790 * Contacts the master and prints out cluster-wide information 791 * @return 0 on success, non-zero on failure 792 */ 793 public int onlineHbck() 794 throws IOException, KeeperException, InterruptedException { 795 // print hbase server version 796 errors.print("Version: " + status.getHBaseVersion()); 797 798 // Clean start 799 clearState(); 800 // Do offline check and repair first 801 offlineHdfsIntegrityRepair(); 802 offlineReferenceFileRepair(); 803 offlineHLinkFileRepair(); 804 // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online 805 // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it 806 // is better to set Master into maintenance mode during online hbck. 807 // 808 if (!setMasterInMaintenanceMode()) { 809 LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient " 810 + "error. Please run HBCK multiple times to reduce the chance of transient error."); 811 } 812 813 onlineConsistencyRepair(); 814 815 if (checkRegionBoundaries) { 816 checkRegionBoundaries(); 817 } 818 819 checkAndFixReplication(); 820 821 // Remove the hbck znode 822 cleanupHbckZnode(); 823 824 // Remove the hbck lock 825 unlockHbck(); 826 827 // Print table summary 828 printTableSummary(tablesInfo); 829 return errors.summarize(); 830 } 831 832 public static byte[] keyOnly(byte[] b) { 833 if (b == null) 834 return b; 835 int rowlength = Bytes.toShort(b, 0); 836 byte[] result = new byte[rowlength]; 837 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength); 838 return result; 839 } 840 841 @Override 842 public void close() throws IOException { 843 try { 844 cleanupHbckZnode(); 845 unlockHbck(); 846 } catch (Exception io) { 847 LOG.warn(io.toString(), io); 848 } finally { 849 if (zkw != null) { 850 zkw.close(); 851 zkw = null; 852 } 853 IOUtils.closeQuietly(admin); 854 IOUtils.closeQuietly(meta); 855 IOUtils.closeQuietly(connection); 856 } 857 } 858 859 private static class RegionBoundariesInformation { 860 public byte [] regionName; 861 public byte [] metaFirstKey; 862 public byte [] metaLastKey; 863 public byte [] storesFirstKey; 864 public byte [] storesLastKey; 865 @Override 866 public String toString () { 867 return "regionName=" + Bytes.toStringBinary(regionName) + 868 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) + 869 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) + 870 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) + 871 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey); 872 } 873 } 874 875 public void checkRegionBoundaries() { 876 try { 877 ByteArrayComparator comparator = new ByteArrayComparator(); 878 List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true); 879 final RegionBoundariesInformation currentRegionBoundariesInformation = 880 new RegionBoundariesInformation(); 881 Path hbaseRoot = FSUtils.getRootDir(getConf()); 882 for (RegionInfo regionInfo : regions) { 883 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable()); 884 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName(); 885 // For each region, get the start and stop key from the META and compare them to the 886 // same information from the Stores. 887 Path path = new Path(tableDir, regionInfo.getEncodedName()); 888 FileSystem fs = path.getFileSystem(getConf()); 889 FileStatus[] files = fs.listStatus(path); 890 // For all the column families in this region... 891 byte[] storeFirstKey = null; 892 byte[] storeLastKey = null; 893 for (FileStatus file : files) { 894 String fileName = file.getPath().toString(); 895 fileName = fileName.substring(fileName.lastIndexOf("/") + 1); 896 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) { 897 FileStatus[] storeFiles = fs.listStatus(file.getPath()); 898 // For all the stores in this column family. 899 for (FileStatus storeFile : storeFiles) { 900 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), 901 CacheConfig.DISABLED, true, getConf()); 902 if ((reader.getFirstKey() != null) 903 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey, 904 ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) { 905 storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey(); 906 } 907 if ((reader.getLastKey() != null) 908 && ((storeLastKey == null) || (comparator.compare(storeLastKey, 909 ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) { 910 storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey(); 911 } 912 reader.close(); 913 } 914 } 915 } 916 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey(); 917 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey(); 918 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey); 919 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey); 920 if (currentRegionBoundariesInformation.metaFirstKey.length == 0) 921 currentRegionBoundariesInformation.metaFirstKey = null; 922 if (currentRegionBoundariesInformation.metaLastKey.length == 0) 923 currentRegionBoundariesInformation.metaLastKey = null; 924 925 // For a region to be correct, we need the META start key to be smaller or equal to the 926 // smallest start key from all the stores, and the start key from the next META entry to 927 // be bigger than the last key from all the current stores. First region start key is null; 928 // Last region end key is null; some regions can be empty and not have any store. 929 930 boolean valid = true; 931 // Checking start key. 932 if ((currentRegionBoundariesInformation.storesFirstKey != null) 933 && (currentRegionBoundariesInformation.metaFirstKey != null)) { 934 valid = valid 935 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey, 936 currentRegionBoundariesInformation.metaFirstKey) >= 0; 937 } 938 // Checking stop key. 939 if ((currentRegionBoundariesInformation.storesLastKey != null) 940 && (currentRegionBoundariesInformation.metaLastKey != null)) { 941 valid = valid 942 && comparator.compare(currentRegionBoundariesInformation.storesLastKey, 943 currentRegionBoundariesInformation.metaLastKey) < 0; 944 } 945 if (!valid) { 946 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries", 947 tablesInfo.get(regionInfo.getTable())); 948 LOG.warn("Region's boundaries not aligned between stores and META for:"); 949 LOG.warn(Objects.toString(currentRegionBoundariesInformation)); 950 } 951 } 952 } catch (IOException e) { 953 LOG.error(e.toString(), e); 954 } 955 } 956 957 /** 958 * Iterates through the list of all orphan/invalid regiondirs. 959 */ 960 private void adoptHdfsOrphans(Collection<HbckRegionInfo> orphanHdfsDirs) throws IOException { 961 for (HbckRegionInfo hi : orphanHdfsDirs) { 962 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir()); 963 adoptHdfsOrphan(hi); 964 } 965 } 966 967 /** 968 * Orphaned regions are regions without a .regioninfo file in them. We "adopt" 969 * these orphans by creating a new region, and moving the column families, 970 * recovered edits, WALs, into the new region dir. We determine the region 971 * startkey and endkeys by looking at all of the hfiles inside the column 972 * families to identify the min and max keys. The resulting region will 973 * likely violate table integrity but will be dealt with by merging 974 * overlapping regions. 975 */ 976 @SuppressWarnings("deprecation") 977 private void adoptHdfsOrphan(HbckRegionInfo hi) throws IOException { 978 Path p = hi.getHdfsRegionDir(); 979 FileSystem fs = p.getFileSystem(getConf()); 980 FileStatus[] dirs = fs.listStatus(p); 981 if (dirs == null) { 982 LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " + 983 p + ". This dir could probably be deleted."); 984 return ; 985 } 986 987 TableName tableName = hi.getTableName(); 988 HbckTableInfo tableInfo = tablesInfo.get(tableName); 989 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!"); 990 TableDescriptor template = tableInfo.getTableDescriptor(); 991 992 // find min and max key values 993 Pair<byte[],byte[]> orphanRegionRange = null; 994 for (FileStatus cf : dirs) { 995 String cfName= cf.getPath().getName(); 996 // TODO Figure out what the special dirs are 997 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue; 998 999 FileStatus[] hfiles = fs.listStatus(cf.getPath()); 1000 for (FileStatus hfile : hfiles) { 1001 byte[] start, end; 1002 HFile.Reader hf = null; 1003 try { 1004 hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf()); 1005 hf.loadFileInfo(); 1006 Optional<Cell> startKv = hf.getFirstKey(); 1007 start = CellUtil.cloneRow(startKv.get()); 1008 Optional<Cell> endKv = hf.getLastKey(); 1009 end = CellUtil.cloneRow(endKv.get()); 1010 } catch (IOException ioe) { 1011 LOG.warn("Problem reading orphan file " + hfile + ", skipping"); 1012 continue; 1013 } catch (NullPointerException ioe) { 1014 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping"); 1015 continue; 1016 } finally { 1017 if (hf != null) { 1018 hf.close(); 1019 } 1020 } 1021 1022 // expand the range to include the range of all hfiles 1023 if (orphanRegionRange == null) { 1024 // first range 1025 orphanRegionRange = new Pair<>(start, end); 1026 } else { 1027 // TODO add test 1028 1029 // expand range only if the hfile is wider. 1030 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) { 1031 orphanRegionRange.setFirst(start); 1032 } 1033 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) { 1034 orphanRegionRange.setSecond(end); 1035 } 1036 } 1037 } 1038 } 1039 if (orphanRegionRange == null) { 1040 LOG.warn("No data in dir " + p + ", sidelining data"); 1041 fixes++; 1042 sidelineRegionDir(fs, hi); 1043 return; 1044 } 1045 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " + 1046 Bytes.toString(orphanRegionRange.getSecond()) + ")"); 1047 1048 // create new region on hdfs. move data into place. 1049 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName()) 1050 .setStartKey(orphanRegionRange.getFirst()) 1051 .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1])) 1052 .build(); 1053 LOG.info("Creating new region : " + regionInfo); 1054 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template); 1055 Path target = region.getRegionFileSystem().getRegionDir(); 1056 1057 // rename all the data to new region 1058 mergeRegionDirs(target, hi); 1059 fixes++; 1060 } 1061 1062 /** 1063 * This method determines if there are table integrity errors in HDFS. If 1064 * there are errors and the appropriate "fix" options are enabled, the method 1065 * will first correct orphan regions making them into legit regiondirs, and 1066 * then reload to merge potentially overlapping regions. 1067 * 1068 * @return number of table integrity errors found 1069 */ 1070 private int restoreHdfsIntegrity() throws IOException, InterruptedException { 1071 // Determine what's on HDFS 1072 LOG.info("Loading HBase regioninfo from HDFS..."); 1073 loadHdfsRegionDirs(); // populating regioninfo table. 1074 1075 int errs = errors.getErrorList().size(); 1076 // First time just get suggestions. 1077 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 1078 checkHdfsIntegrity(false, false); 1079 1080 if (errors.getErrorList().size() == errs) { 1081 LOG.info("No integrity errors. We are done with this phase. Glorious."); 1082 return 0; 1083 } 1084 1085 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) { 1086 adoptHdfsOrphans(orphanHdfsDirs); 1087 // TODO optimize by incrementally adding instead of reloading. 1088 } 1089 1090 // Make sure there are no holes now. 1091 if (shouldFixHdfsHoles()) { 1092 clearState(); // this also resets # fixes. 1093 loadHdfsRegionDirs(); 1094 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 1095 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false); 1096 } 1097 1098 // Now we fix overlaps 1099 if (shouldFixHdfsOverlaps()) { 1100 // second pass we fix overlaps. 1101 clearState(); // this also resets # fixes. 1102 loadHdfsRegionDirs(); 1103 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. 1104 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps()); 1105 } 1106 1107 return errors.getErrorList().size(); 1108 } 1109 1110 /** 1111 * Scan all the store file names to find any lingering reference files, 1112 * which refer to some none-exiting files. If "fix" option is enabled, 1113 * any lingering reference file will be sidelined if found. 1114 * <p> 1115 * Lingering reference file prevents a region from opening. It has to 1116 * be fixed before a cluster can start properly. 1117 */ 1118 private void offlineReferenceFileRepair() throws IOException, InterruptedException { 1119 clearState(); 1120 Configuration conf = getConf(); 1121 Path hbaseRoot = FSUtils.getRootDir(conf); 1122 FileSystem fs = hbaseRoot.getFileSystem(conf); 1123 LOG.info("Computing mapping of all store files"); 1124 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, 1125 new FSUtils.ReferenceFileFilter(fs), executor, errors); 1126 errors.print(""); 1127 LOG.info("Validating mapping using HDFS state"); 1128 for (Path path: allFiles.values()) { 1129 Path referredToFile = StoreFileInfo.getReferredToFile(path); 1130 if (fs.exists(referredToFile)) continue; // good, expected 1131 1132 // Found a lingering reference file 1133 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE, 1134 "Found lingering reference file " + path); 1135 if (!shouldFixReferenceFiles()) continue; 1136 1137 // Now, trying to fix it since requested 1138 boolean success = false; 1139 String pathStr = path.toString(); 1140 1141 // A reference file path should be like 1142 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name 1143 // Up 5 directories to get the root folder. 1144 // So the file will be sidelined to a similar folder structure. 1145 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR); 1146 for (int i = 0; index > 0 && i < 5; i++) { 1147 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1); 1148 } 1149 if (index > 0) { 1150 Path rootDir = getSidelineDir(); 1151 Path dst = new Path(rootDir, pathStr.substring(index + 1)); 1152 fs.mkdirs(dst.getParent()); 1153 LOG.info("Trying to sideline reference file " 1154 + path + " to " + dst); 1155 setShouldRerun(); 1156 1157 success = fs.rename(path, dst); 1158 debugLsr(dst); 1159 1160 } 1161 if (!success) { 1162 LOG.error("Failed to sideline reference file " + path); 1163 } 1164 } 1165 } 1166 1167 /** 1168 * Scan all the store file names to find any lingering HFileLink files, 1169 * which refer to some none-exiting files. If "fix" option is enabled, 1170 * any lingering HFileLink file will be sidelined if found. 1171 */ 1172 private void offlineHLinkFileRepair() throws IOException, InterruptedException { 1173 Configuration conf = getConf(); 1174 Path hbaseRoot = FSUtils.getRootDir(conf); 1175 FileSystem fs = hbaseRoot.getFileSystem(conf); 1176 LOG.info("Computing mapping of all link files"); 1177 Map<String, Path> allFiles = FSUtils 1178 .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors); 1179 errors.print(""); 1180 1181 LOG.info("Validating mapping using HDFS state"); 1182 for (Path path : allFiles.values()) { 1183 // building HFileLink object to gather locations 1184 HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path); 1185 if (actualLink.exists(fs)) continue; // good, expected 1186 1187 // Found a lingering HFileLink 1188 errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path); 1189 if (!shouldFixHFileLinks()) continue; 1190 1191 // Now, trying to fix it since requested 1192 setShouldRerun(); 1193 1194 // An HFileLink path should be like 1195 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename 1196 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure. 1197 boolean success = sidelineFile(fs, hbaseRoot, path); 1198 1199 if (!success) { 1200 LOG.error("Failed to sideline HFileLink file " + path); 1201 } 1202 1203 // An HFileLink backreference path should be like 1204 // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename 1205 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure. 1206 Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil 1207 .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()), 1208 HFileLink.getReferencedRegionName(path.getName().toString()), 1209 path.getParent().getName()), 1210 HFileLink.getReferencedHFileName(path.getName().toString())); 1211 success = sidelineFile(fs, hbaseRoot, backRefPath); 1212 1213 if (!success) { 1214 LOG.error("Failed to sideline HFileLink backreference file " + path); 1215 } 1216 } 1217 } 1218 1219 private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException { 1220 URI uri = hbaseRoot.toUri().relativize(path.toUri()); 1221 if (uri.isAbsolute()) return false; 1222 String relativePath = uri.getPath(); 1223 Path rootDir = getSidelineDir(); 1224 Path dst = new Path(rootDir, relativePath); 1225 boolean pathCreated = fs.mkdirs(dst.getParent()); 1226 if (!pathCreated) { 1227 LOG.error("Failed to create path: " + dst.getParent()); 1228 return false; 1229 } 1230 LOG.info("Trying to sideline file " + path + " to " + dst); 1231 return fs.rename(path, dst); 1232 } 1233 1234 /** 1235 * TODO -- need to add tests for this. 1236 */ 1237 private void reportEmptyMetaCells() { 1238 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " + 1239 emptyRegionInfoQualifiers.size()); 1240 if (details) { 1241 for (Result r: emptyRegionInfoQualifiers) { 1242 errors.print(" " + r); 1243 } 1244 } 1245 } 1246 1247 /** 1248 * TODO -- need to add tests for this. 1249 */ 1250 private void reportTablesInFlux() { 1251 AtomicInteger numSkipped = new AtomicInteger(0); 1252 TableDescriptor[] allTables = getTables(numSkipped); 1253 errors.print("Number of Tables: " + allTables.length); 1254 if (details) { 1255 if (numSkipped.get() > 0) { 1256 errors.detail("Number of Tables in flux: " + numSkipped.get()); 1257 } 1258 for (TableDescriptor td : allTables) { 1259 errors.detail(" Table: " + td.getTableName() + "\t" + 1260 (td.isReadOnly() ? "ro" : "rw") + "\t" + 1261 (td.isMetaRegion() ? "META" : " ") + "\t" + 1262 " families: " + td.getColumnFamilyCount()); 1263 } 1264 } 1265 } 1266 1267 public HbckErrorReporter getErrors() { 1268 return errors; 1269 } 1270 1271 /** 1272 * Populate hbi's from regionInfos loaded from file system. 1273 */ 1274 private SortedMap<TableName, HbckTableInfo> loadHdfsRegionInfos() 1275 throws IOException, InterruptedException { 1276 tablesInfo.clear(); // regenerating the data 1277 // generate region split structure 1278 Collection<HbckRegionInfo> hbckRegionInfos = regionInfoMap.values(); 1279 1280 // Parallelized read of .regioninfo files. 1281 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckRegionInfos.size()); 1282 List<Future<Void>> hbiFutures; 1283 1284 for (HbckRegionInfo hbi : hbckRegionInfos) { 1285 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors); 1286 hbis.add(work); 1287 } 1288 1289 // Submit and wait for completion 1290 hbiFutures = executor.invokeAll(hbis); 1291 1292 for(int i=0; i<hbiFutures.size(); i++) { 1293 WorkItemHdfsRegionInfo work = hbis.get(i); 1294 Future<Void> f = hbiFutures.get(i); 1295 try { 1296 f.get(); 1297 } catch(ExecutionException e) { 1298 LOG.warn("Failed to read .regioninfo file for region " + 1299 work.hbi.getRegionNameAsString(), e.getCause()); 1300 } 1301 } 1302 1303 Path hbaseRoot = FSUtils.getRootDir(getConf()); 1304 FileSystem fs = hbaseRoot.getFileSystem(getConf()); 1305 // serialized table info gathering. 1306 for (HbckRegionInfo hbi: hbckRegionInfos) { 1307 1308 if (hbi.getHdfsHRI() == null) { 1309 // was an orphan 1310 continue; 1311 } 1312 1313 1314 // get table name from hdfs, populate various HBaseFsck tables. 1315 TableName tableName = hbi.getTableName(); 1316 if (tableName == null) { 1317 // There was an entry in hbase:meta not in the HDFS? 1318 LOG.warn("tableName was null for: " + hbi); 1319 continue; 1320 } 1321 1322 HbckTableInfo modTInfo = tablesInfo.get(tableName); 1323 if (modTInfo == null) { 1324 // only executed once per table. 1325 modTInfo = new HbckTableInfo(tableName, this); 1326 tablesInfo.put(tableName, modTInfo); 1327 try { 1328 TableDescriptor htd = 1329 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName); 1330 modTInfo.htds.add(htd); 1331 } catch (IOException ioe) { 1332 if (!orphanTableDirs.containsKey(tableName)) { 1333 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); 1334 //should only report once for each table 1335 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, 1336 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName); 1337 Set<String> columns = new HashSet<>(); 1338 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi)); 1339 } 1340 } 1341 } 1342 if (!hbi.isSkipChecks()) { 1343 modTInfo.addRegionInfo(hbi); 1344 } 1345 } 1346 1347 loadTableInfosForTablesWithNoRegion(); 1348 errors.print(""); 1349 1350 return tablesInfo; 1351 } 1352 1353 /** 1354 * To get the column family list according to the column family dirs 1355 * @param columns 1356 * @param hbi 1357 * @return a set of column families 1358 * @throws IOException 1359 */ 1360 private Set<String> getColumnFamilyList(Set<String> columns, HbckRegionInfo hbi) 1361 throws IOException { 1362 Path regionDir = hbi.getHdfsRegionDir(); 1363 FileSystem fs = regionDir.getFileSystem(getConf()); 1364 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs)); 1365 for (FileStatus subdir : subDirs) { 1366 String columnfamily = subdir.getPath().getName(); 1367 columns.add(columnfamily); 1368 } 1369 return columns; 1370 } 1371 1372 /** 1373 * To fabricate a .tableinfo file with following contents<br> 1374 * 1. the correct tablename <br> 1375 * 2. the correct colfamily list<br> 1376 * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br> 1377 * @throws IOException 1378 */ 1379 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName, 1380 Set<String> columns) throws IOException { 1381 if (columns ==null || columns.isEmpty()) return false; 1382 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); 1383 for (String columnfamimly : columns) { 1384 builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly)); 1385 } 1386 fstd.createTableDescriptor(builder.build(), true); 1387 return true; 1388 } 1389 1390 /** 1391 * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br> 1392 * @throws IOException 1393 */ 1394 public void fixEmptyMetaCells() throws IOException { 1395 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) { 1396 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows."); 1397 for (Result region : emptyRegionInfoQualifiers) { 1398 deleteMetaRegion(region.getRow()); 1399 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL); 1400 } 1401 emptyRegionInfoQualifiers.clear(); 1402 } 1403 } 1404 1405 /** 1406 * To fix orphan table by creating a .tableinfo file under tableDir <br> 1407 * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br> 1408 * 2. else create a default .tableinfo file with following items<br> 1409 * 2.1 the correct tablename <br> 1410 * 2.2 the correct colfamily list<br> 1411 * 2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br> 1412 * @throws IOException 1413 */ 1414 public void fixOrphanTables() throws IOException { 1415 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) { 1416 1417 List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size()); 1418 tmpList.addAll(orphanTableDirs.keySet()); 1419 TableDescriptor[] htds = getTableDescriptors(tmpList); 1420 Iterator<Entry<TableName, Set<String>>> iter = 1421 orphanTableDirs.entrySet().iterator(); 1422 int j = 0; 1423 int numFailedCase = 0; 1424 FSTableDescriptors fstd = new FSTableDescriptors(getConf()); 1425 while (iter.hasNext()) { 1426 Entry<TableName, Set<String>> entry = 1427 iter.next(); 1428 TableName tableName = entry.getKey(); 1429 LOG.info("Trying to fix orphan table error: " + tableName); 1430 if (j < htds.length) { 1431 if (tableName.equals(htds[j].getTableName())) { 1432 TableDescriptor htd = htds[j]; 1433 LOG.info("fixing orphan table: " + tableName + " from cache"); 1434 fstd.createTableDescriptor(htd, true); 1435 j++; 1436 iter.remove(); 1437 } 1438 } else { 1439 if (fabricateTableInfo(fstd, tableName, entry.getValue())) { 1440 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file"); 1441 LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName); 1442 iter.remove(); 1443 } else { 1444 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information"); 1445 numFailedCase++; 1446 } 1447 } 1448 fixes++; 1449 } 1450 1451 if (orphanTableDirs.isEmpty()) { 1452 // all orphanTableDirs are luckily recovered 1453 // re-run doFsck after recovering the .tableinfo file 1454 setShouldRerun(); 1455 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed"); 1456 } else if (numFailedCase > 0) { 1457 LOG.error("Failed to fix " + numFailedCase 1458 + " OrphanTables with default .tableinfo files"); 1459 } 1460 1461 } 1462 //cleanup the list 1463 orphanTableDirs.clear(); 1464 1465 } 1466 1467 /** 1468 * Log an appropriate message about whether or not overlapping merges are computed in parallel. 1469 */ 1470 private void logParallelMerge() { 1471 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) { 1472 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" + 1473 " false to run serially."); 1474 } else { 1475 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" + 1476 " true to run in parallel."); 1477 } 1478 } 1479 1480 private SortedMap<TableName, HbckTableInfo> checkHdfsIntegrity(boolean fixHoles, 1481 boolean fixOverlaps) throws IOException { 1482 LOG.info("Checking HBase region split map from HDFS data..."); 1483 logParallelMerge(); 1484 for (HbckTableInfo tInfo : tablesInfo.values()) { 1485 TableIntegrityErrorHandler handler; 1486 if (fixHoles || fixOverlaps) { 1487 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), 1488 fixHoles, fixOverlaps); 1489 } else { 1490 handler = tInfo.new IntegrityFixSuggester(tInfo, errors); 1491 } 1492 if (!tInfo.checkRegionChain(handler)) { 1493 // should dump info as well. 1494 errors.report("Found inconsistency in table " + tInfo.getName()); 1495 } 1496 } 1497 return tablesInfo; 1498 } 1499 1500 Path getSidelineDir() throws IOException { 1501 if (sidelineDir == null) { 1502 Path hbaseDir = FSUtils.getRootDir(getConf()); 1503 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME); 1504 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-" 1505 + startMillis); 1506 } 1507 return sidelineDir; 1508 } 1509 1510 /** 1511 * Sideline a region dir (instead of deleting it) 1512 */ 1513 Path sidelineRegionDir(FileSystem fs, HbckRegionInfo hi) throws IOException { 1514 return sidelineRegionDir(fs, null, hi); 1515 } 1516 1517 /** 1518 * Sideline a region dir (instead of deleting it) 1519 * 1520 * @param parentDir if specified, the region will be sidelined to folder like 1521 * {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together 1522 * similar regions sidelined, for example, those regions should be bulk loaded back later 1523 * on. If NULL, it is ignored. 1524 */ 1525 Path sidelineRegionDir(FileSystem fs, 1526 String parentDir, HbckRegionInfo hi) throws IOException { 1527 TableName tableName = hi.getTableName(); 1528 Path regionDir = hi.getHdfsRegionDir(); 1529 1530 if (!fs.exists(regionDir)) { 1531 LOG.warn("No previous " + regionDir + " exists. Continuing."); 1532 return null; 1533 } 1534 1535 Path rootDir = getSidelineDir(); 1536 if (parentDir != null) { 1537 rootDir = new Path(rootDir, parentDir); 1538 } 1539 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName); 1540 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName()); 1541 fs.mkdirs(sidelineRegionDir); 1542 boolean success = false; 1543 FileStatus[] cfs = fs.listStatus(regionDir); 1544 if (cfs == null) { 1545 LOG.info("Region dir is empty: " + regionDir); 1546 } else { 1547 for (FileStatus cf : cfs) { 1548 Path src = cf.getPath(); 1549 Path dst = new Path(sidelineRegionDir, src.getName()); 1550 if (fs.isFile(src)) { 1551 // simple file 1552 success = fs.rename(src, dst); 1553 if (!success) { 1554 String msg = "Unable to rename file " + src + " to " + dst; 1555 LOG.error(msg); 1556 throw new IOException(msg); 1557 } 1558 continue; 1559 } 1560 1561 // is a directory. 1562 fs.mkdirs(dst); 1563 1564 LOG.info("Sidelining files from " + src + " into containing region " + dst); 1565 // FileSystem.rename is inconsistent with directories -- if the 1566 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir, 1567 // it moves the src into the dst dir resulting in (foo/a/b). If 1568 // the dst does not exist, and the src a dir, src becomes dst. (foo/b) 1569 FileStatus[] hfiles = fs.listStatus(src); 1570 if (hfiles != null && hfiles.length > 0) { 1571 for (FileStatus hfile : hfiles) { 1572 success = fs.rename(hfile.getPath(), dst); 1573 if (!success) { 1574 String msg = "Unable to rename file " + src + " to " + dst; 1575 LOG.error(msg); 1576 throw new IOException(msg); 1577 } 1578 } 1579 } 1580 LOG.debug("Sideline directory contents:"); 1581 debugLsr(sidelineRegionDir); 1582 } 1583 } 1584 1585 LOG.info("Removing old region dir: " + regionDir); 1586 success = fs.delete(regionDir, true); 1587 if (!success) { 1588 String msg = "Unable to delete dir " + regionDir; 1589 LOG.error(msg); 1590 throw new IOException(msg); 1591 } 1592 return sidelineRegionDir; 1593 } 1594 1595 /** 1596 * Load the list of disabled tables in ZK into local set. 1597 * @throws ZooKeeperConnectionException 1598 * @throws IOException 1599 */ 1600 private void loadTableStates() 1601 throws IOException { 1602 tableStates = MetaTableAccessor.getTableStates(connection); 1603 // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it 1604 // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in 1605 // meantime. 1606 this.tableStates.put(TableName.META_TABLE_NAME, 1607 new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED)); 1608 } 1609 1610 /** 1611 * Check if the specified region's table is disabled. 1612 * @param tableName table to check status of 1613 */ 1614 boolean isTableDisabled(TableName tableName) { 1615 return tableStates.containsKey(tableName) 1616 && tableStates.get(tableName) 1617 .inStates(TableState.State.DISABLED, TableState.State.DISABLING); 1618 } 1619 1620 /** 1621 * Scan HDFS for all regions, recording their information into 1622 * regionInfoMap 1623 */ 1624 public void loadHdfsRegionDirs() throws IOException, InterruptedException { 1625 Path rootDir = FSUtils.getRootDir(getConf()); 1626 FileSystem fs = rootDir.getFileSystem(getConf()); 1627 1628 // list all tables from HDFS 1629 List<FileStatus> tableDirs = Lists.newArrayList(); 1630 1631 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME)); 1632 1633 List<Path> paths = FSUtils.getTableDirs(fs, rootDir); 1634 for (Path path : paths) { 1635 TableName tableName = FSUtils.getTableName(path); 1636 if ((!checkMetaOnly && 1637 isTableIncluded(tableName)) || 1638 tableName.equals(TableName.META_TABLE_NAME)) { 1639 tableDirs.add(fs.getFileStatus(path)); 1640 } 1641 } 1642 1643 // verify that version file exists 1644 if (!foundVersionFile) { 1645 errors.reportError(ERROR_CODE.NO_VERSION_FILE, 1646 "Version file does not exist in root dir " + rootDir); 1647 if (shouldFixVersionFile()) { 1648 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME 1649 + " file."); 1650 setShouldRerun(); 1651 FSUtils.setVersion(fs, rootDir, getConf().getInt( 1652 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt( 1653 HConstants.VERSION_FILE_WRITE_ATTEMPTS, 1654 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS)); 1655 } 1656 } 1657 1658 // Avoid multithreading at table-level because already multithreaded internally at 1659 // region-level. Additionally multithreading at table-level can lead to deadlock 1660 // if there are many tables in the cluster. Since there are a limited # of threads 1661 // in the executor's thread pool and if we multithread at the table-level by putting 1662 // WorkItemHdfsDir callables into the executor, then we will have some threads in the 1663 // executor tied up solely in waiting for the tables' region-level calls to complete. 1664 // If there are enough tables then there will be no actual threads in the pool left 1665 // for the region-level callables to be serviced. 1666 for (FileStatus tableDir : tableDirs) { 1667 LOG.debug("Loading region dirs from " +tableDir.getPath()); 1668 WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir); 1669 try { 1670 item.call(); 1671 } catch (ExecutionException e) { 1672 LOG.warn("Could not completely load table dir " + 1673 tableDir.getPath(), e.getCause()); 1674 } 1675 } 1676 errors.print(""); 1677 } 1678 1679 /** 1680 * Record the location of the hbase:meta region as found in ZooKeeper. 1681 */ 1682 private boolean recordMetaRegion() throws IOException { 1683 RegionLocations rl = connection.locateRegion(TableName.META_TABLE_NAME, 1684 HConstants.EMPTY_START_ROW, false, false); 1685 if (rl == null) { 1686 errors.reportError(ERROR_CODE.NULL_META_REGION, 1687 "META region was not found in ZooKeeper"); 1688 return false; 1689 } 1690 for (HRegionLocation metaLocation : rl.getRegionLocations()) { 1691 // Check if Meta region is valid and existing 1692 if (metaLocation == null ) { 1693 errors.reportError(ERROR_CODE.NULL_META_REGION, 1694 "META region location is null"); 1695 return false; 1696 } 1697 if (metaLocation.getRegionInfo() == null) { 1698 errors.reportError(ERROR_CODE.NULL_META_REGION, 1699 "META location regionInfo is null"); 1700 return false; 1701 } 1702 if (metaLocation.getHostname() == null) { 1703 errors.reportError(ERROR_CODE.NULL_META_REGION, 1704 "META location hostName is null"); 1705 return false; 1706 } 1707 ServerName sn = metaLocation.getServerName(); 1708 HbckRegionInfo.MetaEntry m = new HbckRegionInfo.MetaEntry(metaLocation.getRegion(), sn, 1709 EnvironmentEdgeManager.currentTime()); 1710 HbckRegionInfo hbckRegionInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName()); 1711 if (hbckRegionInfo == null) { 1712 regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckRegionInfo(m)); 1713 } else { 1714 hbckRegionInfo.setMetaEntry(m); 1715 } 1716 } 1717 return true; 1718 } 1719 1720 private ZKWatcher createZooKeeperWatcher() throws IOException { 1721 return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() { 1722 @Override 1723 public void abort(String why, Throwable e) { 1724 LOG.error(why, e); 1725 System.exit(1); 1726 } 1727 1728 @Override 1729 public boolean isAborted() { 1730 return false; 1731 } 1732 1733 }); 1734 } 1735 1736 private ServerName getMetaRegionServerName(int replicaId) 1737 throws IOException, KeeperException { 1738 return new MetaTableLocator().getMetaRegionLocation(zkw, replicaId); 1739 } 1740 1741 /** 1742 * Contacts each regionserver and fetches metadata about regions. 1743 * @param regionServerList - the list of region servers to connect to 1744 * @throws IOException if a remote or network exception occurs 1745 */ 1746 void processRegionServers(Collection<ServerName> regionServerList) 1747 throws IOException, InterruptedException { 1748 1749 List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size()); 1750 List<Future<Void>> workFutures; 1751 1752 // loop to contact each region server in parallel 1753 for (ServerName rsinfo: regionServerList) { 1754 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection)); 1755 } 1756 1757 workFutures = executor.invokeAll(workItems); 1758 1759 for(int i=0; i<workFutures.size(); i++) { 1760 WorkItemRegion item = workItems.get(i); 1761 Future<Void> f = workFutures.get(i); 1762 try { 1763 f.get(); 1764 } catch(ExecutionException e) { 1765 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(), 1766 e.getCause()); 1767 } 1768 } 1769 } 1770 1771 /** 1772 * Check consistency of all regions that have been found in previous phases. 1773 */ 1774 private void checkAndFixConsistency() 1775 throws IOException, KeeperException, InterruptedException { 1776 // Divide the checks in two phases. One for default/primary replicas and another 1777 // for the non-primary ones. Keeps code cleaner this way. 1778 1779 List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size()); 1780 for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) { 1781 if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 1782 workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue())); 1783 } 1784 } 1785 checkRegionConsistencyConcurrently(workItems); 1786 1787 boolean prevHdfsCheck = shouldCheckHdfs(); 1788 setCheckHdfs(false); //replicas don't have any hdfs data 1789 // Run a pass over the replicas and fix any assignment issues that exist on the currently 1790 // deployed/undeployed replicas. 1791 List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size()); 1792 for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) { 1793 if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 1794 replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue())); 1795 } 1796 } 1797 checkRegionConsistencyConcurrently(replicaWorkItems); 1798 setCheckHdfs(prevHdfsCheck); 1799 1800 // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might 1801 // not get accurate state of the hbase if continuing. The config here allows users to tune 1802 // the tolerance of number of skipped region. 1803 // TODO: evaluate the consequence to continue the hbck operation without config. 1804 int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0); 1805 int numOfSkippedRegions = skippedRegions.size(); 1806 if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) { 1807 throw new IOException(numOfSkippedRegions 1808 + " region(s) could not be checked or repaired. See logs for detail."); 1809 } 1810 1811 if (shouldCheckHdfs()) { 1812 checkAndFixTableStates(); 1813 } 1814 } 1815 1816 /** 1817 * Check consistency of all regions using mulitple threads concurrently. 1818 */ 1819 private void checkRegionConsistencyConcurrently( 1820 final List<CheckRegionConsistencyWorkItem> workItems) 1821 throws IOException, KeeperException, InterruptedException { 1822 if (workItems.isEmpty()) { 1823 return; // nothing to check 1824 } 1825 1826 List<Future<Void>> workFutures = executor.invokeAll(workItems); 1827 for(Future<Void> f: workFutures) { 1828 try { 1829 f.get(); 1830 } catch(ExecutionException e1) { 1831 LOG.warn("Could not check region consistency " , e1.getCause()); 1832 if (e1.getCause() instanceof IOException) { 1833 throw (IOException)e1.getCause(); 1834 } else if (e1.getCause() instanceof KeeperException) { 1835 throw (KeeperException)e1.getCause(); 1836 } else if (e1.getCause() instanceof InterruptedException) { 1837 throw (InterruptedException)e1.getCause(); 1838 } else { 1839 throw new IOException(e1.getCause()); 1840 } 1841 } 1842 } 1843 } 1844 1845 class CheckRegionConsistencyWorkItem implements Callable<Void> { 1846 private final String key; 1847 private final HbckRegionInfo hbi; 1848 1849 CheckRegionConsistencyWorkItem(String key, HbckRegionInfo hbi) { 1850 this.key = key; 1851 this.hbi = hbi; 1852 } 1853 1854 @Override 1855 public synchronized Void call() throws Exception { 1856 try { 1857 checkRegionConsistency(key, hbi); 1858 } catch (Exception e) { 1859 // If the region is non-META region, skip this region and send warning/error message; if 1860 // the region is META region, we should not continue. 1861 LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString() 1862 + "'.", e); 1863 if (hbi.getHdfsHRI().isMetaRegion()) { 1864 throw e; 1865 } 1866 LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'"); 1867 addSkippedRegion(hbi); 1868 } 1869 return null; 1870 } 1871 } 1872 1873 private void addSkippedRegion(final HbckRegionInfo hbi) { 1874 Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName()); 1875 if (skippedRegionNames == null) { 1876 skippedRegionNames = new HashSet<>(); 1877 } 1878 skippedRegionNames.add(hbi.getRegionNameAsString()); 1879 skippedRegions.put(hbi.getTableName(), skippedRegionNames); 1880 } 1881 1882 /** 1883 * Check and fix table states, assumes full info available: 1884 * - tableInfos 1885 * - empty tables loaded 1886 */ 1887 private void checkAndFixTableStates() throws IOException { 1888 // first check dangling states 1889 for (Entry<TableName, TableState> entry : tableStates.entrySet()) { 1890 TableName tableName = entry.getKey(); 1891 TableState tableState = entry.getValue(); 1892 HbckTableInfo tableInfo = tablesInfo.get(tableName); 1893 if (isTableIncluded(tableName) 1894 && !tableName.isSystemTable() 1895 && tableInfo == null) { 1896 if (fixMeta) { 1897 MetaTableAccessor.deleteTableState(connection, tableName); 1898 TableState state = MetaTableAccessor.getTableState(connection, tableName); 1899 if (state != null) { 1900 errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE, 1901 tableName + " unable to delete dangling table state " + tableState); 1902 } 1903 } else if (!checkMetaOnly) { 1904 // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is 1905 // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs 1906 errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE, 1907 tableName + " has dangling table state " + tableState); 1908 } 1909 } 1910 } 1911 // check that all tables have states 1912 for (TableName tableName : tablesInfo.keySet()) { 1913 if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) { 1914 if (fixMeta) { 1915 MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED); 1916 TableState newState = MetaTableAccessor.getTableState(connection, tableName); 1917 if (newState == null) { 1918 errors.reportError(ERROR_CODE.NO_TABLE_STATE, 1919 "Unable to change state for table " + tableName + " in meta "); 1920 } 1921 } else { 1922 errors.reportError(ERROR_CODE.NO_TABLE_STATE, 1923 tableName + " has no state in meta "); 1924 } 1925 } 1926 } 1927 } 1928 1929 private void preCheckPermission() throws IOException, AccessDeniedException { 1930 if (shouldIgnorePreCheckPermission()) { 1931 return; 1932 } 1933 1934 Path hbaseDir = FSUtils.getRootDir(getConf()); 1935 FileSystem fs = hbaseDir.getFileSystem(getConf()); 1936 UserProvider userProvider = UserProvider.instantiate(getConf()); 1937 UserGroupInformation ugi = userProvider.getCurrent().getUGI(); 1938 FileStatus[] files = fs.listStatus(hbaseDir); 1939 for (FileStatus file : files) { 1940 try { 1941 FSUtils.checkAccess(ugi, file, FsAction.WRITE); 1942 } catch (AccessDeniedException ace) { 1943 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace); 1944 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName() 1945 + " does not have write perms to " + file.getPath() 1946 + ". Please rerun hbck as hdfs user " + file.getOwner()); 1947 throw ace; 1948 } 1949 } 1950 } 1951 1952 /** 1953 * Deletes region from meta table 1954 */ 1955 private void deleteMetaRegion(HbckRegionInfo hi) throws IOException { 1956 deleteMetaRegion(hi.getMetaEntry().getRegionName()); 1957 } 1958 1959 /** 1960 * Deletes region from meta table 1961 */ 1962 private void deleteMetaRegion(byte[] metaKey) throws IOException { 1963 Delete d = new Delete(metaKey); 1964 meta.delete(d); 1965 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" ); 1966 } 1967 1968 /** 1969 * Reset the split parent region info in meta table 1970 */ 1971 private void resetSplitParent(HbckRegionInfo hi) throws IOException { 1972 RowMutations mutations = new RowMutations(hi.getMetaEntry().getRegionName()); 1973 Delete d = new Delete(hi.getMetaEntry().getRegionName()); 1974 d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER); 1975 d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER); 1976 mutations.add(d); 1977 1978 RegionInfo hri = RegionInfoBuilder.newBuilder(hi.getMetaEntry()) 1979 .setOffline(false) 1980 .setSplit(false) 1981 .build(); 1982 Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime()); 1983 mutations.add(p); 1984 1985 meta.mutateRow(mutations); 1986 LOG.info("Reset split parent " + hi.getMetaEntry().getRegionNameAsString() + " in META"); 1987 } 1988 1989 /** 1990 * This backwards-compatibility wrapper for permanently offlining a region 1991 * that should not be alive. If the region server does not support the 1992 * "offline" method, it will use the closest unassign method instead. This 1993 * will basically work until one attempts to disable or delete the affected 1994 * table. The problem has to do with in-memory only master state, so 1995 * restarting the HMaster or failing over to another should fix this. 1996 */ 1997 void offline(byte[] regionName) throws IOException { 1998 String regionString = Bytes.toStringBinary(regionName); 1999 if (!rsSupportsOffline) { 2000 LOG.warn( 2001 "Using unassign region " + regionString + " instead of using offline method, you should" + 2002 " restart HMaster after these repairs"); 2003 admin.unassign(regionName, true); 2004 return; 2005 } 2006 2007 // first time we assume the rs's supports #offline. 2008 try { 2009 LOG.info("Offlining region " + regionString); 2010 admin.offline(regionName); 2011 } catch (IOException ioe) { 2012 String notFoundMsg = "java.lang.NoSuchMethodException: " + 2013 "org.apache.hadoop.hbase.master.HMaster.offline([B)"; 2014 if (ioe.getMessage().contains(notFoundMsg)) { 2015 LOG.warn("Using unassign region " + regionString + 2016 " instead of using offline method, you should" + 2017 " restart HMaster after these repairs"); 2018 rsSupportsOffline = false; // in the future just use unassign 2019 admin.unassign(regionName, true); 2020 return; 2021 } 2022 throw ioe; 2023 } 2024 } 2025 2026 /** 2027 * Attempts to undeploy a region from a region server based in information in 2028 * META. Any operations that modify the file system should make sure that 2029 * its corresponding region is not deployed to prevent data races. 2030 * 2031 * A separate call is required to update the master in-memory region state 2032 * kept in the AssignementManager. Because disable uses this state instead of 2033 * that found in META, we can't seem to cleanly disable/delete tables that 2034 * have been hbck fixed. When used on a version of HBase that does not have 2035 * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master 2036 * restart or failover may be required. 2037 */ 2038 void closeRegion(HbckRegionInfo hi) throws IOException, InterruptedException { 2039 if (hi.getMetaEntry() == null && hi.getHdfsEntry() == null) { 2040 undeployRegions(hi); 2041 return; 2042 } 2043 2044 // get assignment info and hregioninfo from meta. 2045 Get get = new Get(hi.getRegionName()); 2046 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 2047 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); 2048 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER); 2049 // also get the locations of the replicas to close if the primary region is being closed 2050 if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 2051 int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication(); 2052 for (int i = 0; i < numReplicas; i++) { 2053 get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i)); 2054 get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i)); 2055 } 2056 } 2057 Result r = meta.get(get); 2058 RegionLocations rl = MetaTableAccessor.getRegionLocations(r); 2059 if (rl == null) { 2060 LOG.warn("Unable to close region " + hi.getRegionNameAsString() + 2061 " since meta does not have handle to reach it"); 2062 return; 2063 } 2064 for (HRegionLocation h : rl.getRegionLocations()) { 2065 ServerName serverName = h.getServerName(); 2066 if (serverName == null) { 2067 errors.reportError("Unable to close region " 2068 + hi.getRegionNameAsString() + " because meta does not " 2069 + "have handle to reach it."); 2070 continue; 2071 } 2072 RegionInfo hri = h.getRegionInfo(); 2073 if (hri == null) { 2074 LOG.warn("Unable to close region " + hi.getRegionNameAsString() 2075 + " because hbase:meta had invalid or missing " 2076 + HConstants.CATALOG_FAMILY_STR + ":" 2077 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) 2078 + " qualifier value."); 2079 continue; 2080 } 2081 // close the region -- close files and remove assignment 2082 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri); 2083 } 2084 } 2085 2086 private void undeployRegions(HbckRegionInfo hi) throws IOException, InterruptedException { 2087 undeployRegionsForHbi(hi); 2088 // undeploy replicas of the region (but only if the method is invoked for the primary) 2089 if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 2090 return; 2091 } 2092 int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication(); 2093 for (int i = 1; i < numReplicas; i++) { 2094 if (hi.getPrimaryHRIForDeployedReplica() == null) continue; 2095 RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica( 2096 hi.getPrimaryHRIForDeployedReplica(), i); 2097 HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName()); 2098 if (h != null) { 2099 undeployRegionsForHbi(h); 2100 //set skip checks; we undeployed it, and we don't want to evaluate this anymore 2101 //in consistency checks 2102 h.setSkipChecks(true); 2103 } 2104 } 2105 } 2106 2107 private void undeployRegionsForHbi(HbckRegionInfo hi) throws IOException, InterruptedException { 2108 for (HbckRegionInfo.OnlineEntry rse : hi.getOnlineEntries()) { 2109 LOG.debug("Undeploy region " + rse.getRegionInfo() + " from " + rse.getServerName()); 2110 try { 2111 HBaseFsckRepair 2112 .closeRegionSilentlyAndWait(connection, rse.getServerName(), rse.getRegionInfo()); 2113 offline(rse.getRegionInfo().getRegionName()); 2114 } catch (IOException ioe) { 2115 LOG.warn("Got exception when attempting to offline region " 2116 + Bytes.toString(rse.getRegionInfo().getRegionName()), ioe); 2117 } 2118 } 2119 } 2120 2121 private void tryAssignmentRepair(HbckRegionInfo hbi, String msg) throws IOException, 2122 KeeperException, InterruptedException { 2123 // If we are trying to fix the errors 2124 if (shouldFixAssignments()) { 2125 errors.print(msg); 2126 undeployRegions(hbi); 2127 setShouldRerun(); 2128 RegionInfo hri = hbi.getHdfsHRI(); 2129 if (hri == null) { 2130 hri = hbi.getMetaEntry(); 2131 } 2132 HBaseFsckRepair.fixUnassigned(admin, hri); 2133 HBaseFsckRepair.waitUntilAssigned(admin, hri); 2134 2135 // also assign replicas if needed (do it only when this call operates on a primary replica) 2136 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return; 2137 int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication(); 2138 for (int i = 1; i < replicationCount; i++) { 2139 hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i); 2140 HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName()); 2141 if (h != null) { 2142 undeployRegions(h); 2143 //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore 2144 //in consistency checks 2145 h.setSkipChecks(true); 2146 } 2147 HBaseFsckRepair.fixUnassigned(admin, hri); 2148 HBaseFsckRepair.waitUntilAssigned(admin, hri); 2149 } 2150 2151 } 2152 } 2153 2154 /** 2155 * Check a single region for consistency and correct deployment. 2156 */ 2157 private void checkRegionConsistency(final String key, final HbckRegionInfo hbi) 2158 throws IOException, KeeperException, InterruptedException { 2159 2160 if (hbi.isSkipChecks()) return; 2161 String descriptiveName = hbi.toString(); 2162 boolean inMeta = hbi.getMetaEntry() != null; 2163 // In case not checking HDFS, assume the region is on HDFS 2164 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null; 2165 boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null; 2166 boolean isDeployed = !hbi.getDeployedOn().isEmpty(); 2167 boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1; 2168 boolean deploymentMatchesMeta = 2169 hasMetaAssignment && isDeployed && !isMultiplyDeployed && 2170 hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0)); 2171 boolean splitParent = 2172 inMeta && hbi.getMetaEntry().isSplit() && hbi.getMetaEntry().isOffline(); 2173 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.getMetaEntry().getTable()); 2174 boolean recentlyModified = inHdfs && 2175 hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime(); 2176 2177 // ========== First the healthy cases ============= 2178 if (hbi.containsOnlyHdfsEdits()) { 2179 return; 2180 } 2181 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) { 2182 return; 2183 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) { 2184 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " + 2185 "tabled that is not deployed"); 2186 return; 2187 } else if (recentlyModified) { 2188 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping"); 2189 return; 2190 } 2191 // ========== Cases where the region is not in hbase:meta ============= 2192 else if (!inMeta && !inHdfs && !isDeployed) { 2193 // We shouldn't have record of this region at all then! 2194 assert false : "Entry for region with no data"; 2195 } else if (!inMeta && !inHdfs && isDeployed) { 2196 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region " 2197 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " + 2198 "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn())); 2199 if (shouldFixAssignments()) { 2200 undeployRegions(hbi); 2201 } 2202 2203 } else if (!inMeta && inHdfs && !isDeployed) { 2204 if (hbi.isMerged()) { 2205 // This region has already been merged, the remaining hdfs file will be 2206 // cleaned by CatalogJanitor later 2207 hbi.setSkipChecks(true); 2208 LOG.info("Region " + descriptiveName 2209 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later"); 2210 return; 2211 } 2212 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " 2213 + descriptiveName + " on HDFS, but not listed in hbase:meta " + 2214 "or deployed on any region server"); 2215 // restore region consistency of an adopted orphan 2216 if (shouldFixMeta()) { 2217 if (!hbi.isHdfsRegioninfoPresent()) { 2218 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired" 2219 + " in table integrity repair phase if -fixHdfsOrphans was" + 2220 " used."); 2221 return; 2222 } 2223 2224 RegionInfo hri = hbi.getHdfsHRI(); 2225 HbckTableInfo tableInfo = tablesInfo.get(hri.getTable()); 2226 2227 for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) { 2228 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0 2229 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(), 2230 hri.getEndKey()) >= 0) 2231 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) { 2232 if(region.isSplit() || region.isOffline()) continue; 2233 Path regionDir = hbi.getHdfsRegionDir(); 2234 FileSystem fs = regionDir.getFileSystem(getConf()); 2235 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir); 2236 for (Path familyDir : familyDirs) { 2237 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir); 2238 for (Path referenceFilePath : referenceFilePaths) { 2239 Path parentRegionDir = 2240 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent(); 2241 if (parentRegionDir.toString().endsWith(region.getEncodedName())) { 2242 LOG.warn(hri + " start and stop keys are in the range of " + region 2243 + ". The region might not be cleaned up from hdfs when region " + region 2244 + " split failed. Hence deleting from hdfs."); 2245 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, 2246 regionDir.getParent(), hri); 2247 return; 2248 } 2249 } 2250 } 2251 } 2252 } 2253 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI()); 2254 int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication(); 2255 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(), 2256 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 2257 .getLiveServerMetrics().keySet(), numReplicas); 2258 2259 tryAssignmentRepair(hbi, "Trying to reassign region..."); 2260 } 2261 2262 } else if (!inMeta && inHdfs && isDeployed) { 2263 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName 2264 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn())); 2265 debugLsr(hbi.getHdfsRegionDir()); 2266 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 2267 // for replicas, this means that we should undeploy the region (we would have 2268 // gone over the primaries and fixed meta holes in first phase under 2269 // checkAndFixConsistency; we shouldn't get the condition !inMeta at 2270 // this stage unless unwanted replica) 2271 if (shouldFixAssignments()) { 2272 undeployRegionsForHbi(hbi); 2273 } 2274 } 2275 if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 2276 if (!hbi.isHdfsRegioninfoPresent()) { 2277 LOG.error("This should have been repaired in table integrity repair phase"); 2278 return; 2279 } 2280 2281 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI()); 2282 int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication(); 2283 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(), 2284 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 2285 .getLiveServerMetrics().keySet(), numReplicas); 2286 tryAssignmentRepair(hbi, "Trying to fix unassigned region..."); 2287 } 2288 2289 // ========== Cases where the region is in hbase:meta ============= 2290 } else if (inMeta && inHdfs && !isDeployed && splitParent) { 2291 // check whether this is an actual error, or just transient state where parent 2292 // is not cleaned 2293 if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) { 2294 // check that split daughters are there 2295 HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName()); 2296 HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName()); 2297 if (infoA != null && infoB != null) { 2298 // we already processed or will process daughters. Move on, nothing to see here. 2299 hbi.setSkipChecks(true); 2300 return; 2301 } 2302 } 2303 2304 // For Replica region, we need to do a similar check. If replica is not split successfully, 2305 // error is going to be reported against primary daughter region. 2306 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 2307 LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, " 2308 + "and not deployed on any region server. This may be transient."); 2309 hbi.setSkipChecks(true); 2310 return; 2311 } 2312 2313 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region " 2314 + descriptiveName + " is a split parent in META, in HDFS, " 2315 + "and not deployed on any region server. This could be transient, " 2316 + "consider to run the catalog janitor first!"); 2317 if (shouldFixSplitParents()) { 2318 setShouldRerun(); 2319 resetSplitParent(hbi); 2320 } 2321 } else if (inMeta && !inHdfs && !isDeployed) { 2322 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " 2323 + descriptiveName + " found in META, but not in HDFS " 2324 + "or deployed on any region server."); 2325 if (shouldFixMeta()) { 2326 deleteMetaRegion(hbi); 2327 } 2328 } else if (inMeta && !inHdfs && isDeployed) { 2329 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName 2330 + " found in META, but not in HDFS, " + 2331 "and deployed on " + Joiner.on(", ").join(hbi.getDeployedOn())); 2332 // We treat HDFS as ground truth. Any information in meta is transient 2333 // and equivalent data can be regenerated. So, lets unassign and remove 2334 // these problems from META. 2335 if (shouldFixAssignments()) { 2336 errors.print("Trying to fix unassigned region..."); 2337 undeployRegions(hbi); 2338 } 2339 if (shouldFixMeta()) { 2340 // wait for it to complete 2341 deleteMetaRegion(hbi); 2342 } 2343 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) { 2344 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName 2345 + " not deployed on any region server."); 2346 tryAssignmentRepair(hbi, "Trying to fix unassigned region..."); 2347 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) { 2348 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, 2349 "Region " + descriptiveName + " should not be deployed according " + 2350 "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn())); 2351 if (shouldFixAssignments()) { 2352 errors.print("Trying to close the region " + descriptiveName); 2353 setShouldRerun(); 2354 HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn()); 2355 } 2356 } else if (inMeta && inHdfs && isMultiplyDeployed) { 2357 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName 2358 + " is listed in hbase:meta on region server " + hbi.getMetaEntry().regionServer 2359 + " but is multiply assigned to region servers " + 2360 Joiner.on(", ").join(hbi.getDeployedOn())); 2361 // If we are trying to fix the errors 2362 if (shouldFixAssignments()) { 2363 errors.print("Trying to fix assignment error..."); 2364 setShouldRerun(); 2365 HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn()); 2366 } 2367 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) { 2368 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region " 2369 + descriptiveName + " listed in hbase:meta on region server " + 2370 hbi.getMetaEntry().regionServer + " but found on region server " + 2371 hbi.getDeployedOn().get(0)); 2372 // If we are trying to fix the errors 2373 if (shouldFixAssignments()) { 2374 errors.print("Trying to fix assignment error..."); 2375 setShouldRerun(); 2376 HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn()); 2377 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI()); 2378 } 2379 } else { 2380 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName + 2381 " is in an unforeseen state:" + 2382 " inMeta=" + inMeta + 2383 " inHdfs=" + inHdfs + 2384 " isDeployed=" + isDeployed + 2385 " isMultiplyDeployed=" + isMultiplyDeployed + 2386 " deploymentMatchesMeta=" + deploymentMatchesMeta + 2387 " shouldBeDeployed=" + shouldBeDeployed); 2388 } 2389 } 2390 2391 /** 2392 * Checks tables integrity. Goes over all regions and scans the tables. 2393 * Collects all the pieces for each table and checks if there are missing, 2394 * repeated or overlapping ones. 2395 * @throws IOException 2396 */ 2397 SortedMap<TableName, HbckTableInfo> checkIntegrity() throws IOException { 2398 tablesInfo = new TreeMap<>(); 2399 LOG.debug("There are " + regionInfoMap.size() + " region info entries"); 2400 for (HbckRegionInfo hbi : regionInfoMap.values()) { 2401 // Check only valid, working regions 2402 if (hbi.getMetaEntry() == null) { 2403 // this assumes that consistency check has run loadMetaEntry 2404 Path p = hbi.getHdfsRegionDir(); 2405 if (p == null) { 2406 errors.report("No regioninfo in Meta or HDFS. " + hbi); 2407 } 2408 2409 // TODO test. 2410 continue; 2411 } 2412 if (hbi.getMetaEntry().regionServer == null) { 2413 errors.detail("Skipping region because no region server: " + hbi); 2414 continue; 2415 } 2416 if (hbi.getMetaEntry().isOffline()) { 2417 errors.detail("Skipping region because it is offline: " + hbi); 2418 continue; 2419 } 2420 if (hbi.containsOnlyHdfsEdits()) { 2421 errors.detail("Skipping region because it only contains edits" + hbi); 2422 continue; 2423 } 2424 2425 // Missing regionDir or over-deployment is checked elsewhere. Include 2426 // these cases in modTInfo, so we can evaluate those regions as part of 2427 // the region chain in META 2428 //if (hbi.foundRegionDir == null) continue; 2429 //if (hbi.deployedOn.size() != 1) continue; 2430 if (hbi.getDeployedOn().isEmpty()) { 2431 continue; 2432 } 2433 2434 // We should be safe here 2435 TableName tableName = hbi.getMetaEntry().getTable(); 2436 HbckTableInfo modTInfo = tablesInfo.get(tableName); 2437 if (modTInfo == null) { 2438 modTInfo = new HbckTableInfo(tableName, this); 2439 } 2440 for (ServerName server : hbi.getDeployedOn()) { 2441 modTInfo.addServer(server); 2442 } 2443 2444 if (!hbi.isSkipChecks()) { 2445 modTInfo.addRegionInfo(hbi); 2446 } 2447 2448 tablesInfo.put(tableName, modTInfo); 2449 } 2450 2451 loadTableInfosForTablesWithNoRegion(); 2452 2453 logParallelMerge(); 2454 for (HbckTableInfo tInfo : tablesInfo.values()) { 2455 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); 2456 if (!tInfo.checkRegionChain(handler)) { 2457 errors.report("Found inconsistency in table " + tInfo.getName()); 2458 } 2459 } 2460 return tablesInfo; 2461 } 2462 2463 /** Loads table info's for tables that may not have been included, since there are no 2464 * regions reported for the table, but table dir is there in hdfs 2465 */ 2466 private void loadTableInfosForTablesWithNoRegion() throws IOException { 2467 Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll(); 2468 for (TableDescriptor htd : allTables.values()) { 2469 if (checkMetaOnly && !htd.isMetaTable()) { 2470 continue; 2471 } 2472 2473 TableName tableName = htd.getTableName(); 2474 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) { 2475 HbckTableInfo tableInfo = new HbckTableInfo(tableName, this); 2476 tableInfo.htds.add(htd); 2477 tablesInfo.put(htd.getTableName(), tableInfo); 2478 } 2479 } 2480 } 2481 2482 /** 2483 * Merge hdfs data by moving from contained HbckRegionInfo into targetRegionDir. 2484 * @return number of file move fixes done to merge regions. 2485 */ 2486 public int mergeRegionDirs(Path targetRegionDir, HbckRegionInfo contained) throws IOException { 2487 int fileMoves = 0; 2488 String thread = Thread.currentThread().getName(); 2489 LOG.debug("[" + thread + "] Contained region dir after close and pause"); 2490 debugLsr(contained.getHdfsRegionDir()); 2491 2492 // rename the contained into the container. 2493 FileSystem fs = targetRegionDir.getFileSystem(getConf()); 2494 FileStatus[] dirs = null; 2495 try { 2496 dirs = fs.listStatus(contained.getHdfsRegionDir()); 2497 } catch (FileNotFoundException fnfe) { 2498 // region we are attempting to merge in is not present! Since this is a merge, there is 2499 // no harm skipping this region if it does not exist. 2500 if (!fs.exists(contained.getHdfsRegionDir())) { 2501 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 2502 + " is missing. Assuming already sidelined or moved."); 2503 } else { 2504 sidelineRegionDir(fs, contained); 2505 } 2506 return fileMoves; 2507 } 2508 2509 if (dirs == null) { 2510 if (!fs.exists(contained.getHdfsRegionDir())) { 2511 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 2512 + " already sidelined."); 2513 } else { 2514 sidelineRegionDir(fs, contained); 2515 } 2516 return fileMoves; 2517 } 2518 2519 for (FileStatus cf : dirs) { 2520 Path src = cf.getPath(); 2521 Path dst = new Path(targetRegionDir, src.getName()); 2522 2523 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) { 2524 // do not copy the old .regioninfo file. 2525 continue; 2526 } 2527 2528 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) { 2529 // do not copy the .oldlogs files 2530 continue; 2531 } 2532 2533 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst); 2534 // FileSystem.rename is inconsistent with directories -- if the 2535 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir, 2536 // it moves the src into the dst dir resulting in (foo/a/b). If 2537 // the dst does not exist, and the src a dir, src becomes dst. (foo/b) 2538 for (FileStatus hfile : fs.listStatus(src)) { 2539 boolean success = fs.rename(hfile.getPath(), dst); 2540 if (success) { 2541 fileMoves++; 2542 } 2543 } 2544 LOG.debug("[" + thread + "] Sideline directory contents:"); 2545 debugLsr(targetRegionDir); 2546 } 2547 2548 // if all success. 2549 sidelineRegionDir(fs, contained); 2550 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " + 2551 getSidelineDir()); 2552 debugLsr(contained.getHdfsRegionDir()); 2553 2554 return fileMoves; 2555 } 2556 2557 2558 static class WorkItemOverlapMerge implements Callable<Void> { 2559 private TableIntegrityErrorHandler handler; 2560 Collection<HbckRegionInfo> overlapgroup; 2561 2562 WorkItemOverlapMerge(Collection<HbckRegionInfo> overlapgroup, 2563 TableIntegrityErrorHandler handler) { 2564 this.handler = handler; 2565 this.overlapgroup = overlapgroup; 2566 } 2567 2568 @Override 2569 public Void call() throws Exception { 2570 handler.handleOverlapGroup(overlapgroup); 2571 return null; 2572 } 2573 }; 2574 2575 /** 2576 * Return a list of user-space table names whose metadata have not been 2577 * modified in the last few milliseconds specified by timelag 2578 * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER, 2579 * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last 2580 * milliseconds specified by timelag, then the table is a candidate to be returned. 2581 * @return tables that have not been modified recently 2582 * @throws IOException if an error is encountered 2583 */ 2584 TableDescriptor[] getTables(AtomicInteger numSkipped) { 2585 List<TableName> tableNames = new ArrayList<>(); 2586 long now = EnvironmentEdgeManager.currentTime(); 2587 2588 for (HbckRegionInfo hbi : regionInfoMap.values()) { 2589 HbckRegionInfo.MetaEntry info = hbi.getMetaEntry(); 2590 2591 // if the start key is zero, then we have found the first region of a table. 2592 // pick only those tables that were not modified in the last few milliseconds. 2593 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) { 2594 if (info.modTime + timelag < now) { 2595 tableNames.add(info.getTable()); 2596 } else { 2597 numSkipped.incrementAndGet(); // one more in-flux table 2598 } 2599 } 2600 } 2601 return getTableDescriptors(tableNames); 2602 } 2603 2604 TableDescriptor[] getTableDescriptors(List<TableName> tableNames) { 2605 LOG.info("getTableDescriptors == tableNames => " + tableNames); 2606 try (Connection conn = ConnectionFactory.createConnection(getConf()); 2607 Admin admin = conn.getAdmin()) { 2608 List<TableDescriptor> tds = admin.listTableDescriptors(tableNames); 2609 return tds.toArray(new TableDescriptor[tds.size()]); 2610 } catch (IOException e) { 2611 LOG.debug("Exception getting table descriptors", e); 2612 } 2613 return new TableDescriptor[0]; 2614 } 2615 2616 /** 2617 * Gets the entry in regionInfo corresponding to the the given encoded 2618 * region name. If the region has not been seen yet, a new entry is added 2619 * and returned. 2620 */ 2621 private synchronized HbckRegionInfo getOrCreateInfo(String name) { 2622 HbckRegionInfo hbi = regionInfoMap.get(name); 2623 if (hbi == null) { 2624 hbi = new HbckRegionInfo(null); 2625 regionInfoMap.put(name, hbi); 2626 } 2627 return hbi; 2628 } 2629 2630 private void checkAndFixReplication() throws IOException { 2631 ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors); 2632 checker.checkUnDeletedQueues(); 2633 2634 if (checker.hasUnDeletedQueues() && this.fixReplication) { 2635 checker.fixUnDeletedQueues(); 2636 setShouldRerun(); 2637 } 2638 } 2639 2640 /** 2641 * Check values in regionInfo for hbase:meta 2642 * Check if zero or more than one regions with hbase:meta are found. 2643 * If there are inconsistencies (i.e. zero or more than one regions 2644 * pretend to be holding the hbase:meta) try to fix that and report an error. 2645 * @throws IOException from HBaseFsckRepair functions 2646 * @throws KeeperException 2647 * @throws InterruptedException 2648 */ 2649 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException { 2650 Map<Integer, HbckRegionInfo> metaRegions = new HashMap<>(); 2651 for (HbckRegionInfo value : regionInfoMap.values()) { 2652 if (value.getMetaEntry() != null && value.getMetaEntry().isMetaRegion()) { 2653 metaRegions.put(value.getReplicaId(), value); 2654 } 2655 } 2656 int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME) 2657 .getRegionReplication(); 2658 boolean noProblem = true; 2659 // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas 2660 // Check the deployed servers. It should be exactly one server for each replica. 2661 for (int i = 0; i < metaReplication; i++) { 2662 HbckRegionInfo metaHbckRegionInfo = metaRegions.remove(i); 2663 List<ServerName> servers = new ArrayList<>(); 2664 if (metaHbckRegionInfo != null) { 2665 servers = metaHbckRegionInfo.getDeployedOn(); 2666 } 2667 if (servers.size() != 1) { 2668 noProblem = false; 2669 if (servers.isEmpty()) { 2670 assignMetaReplica(i); 2671 } else if (servers.size() > 1) { 2672 errors 2673 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " + 2674 metaHbckRegionInfo.getReplicaId() + " is found on more than one region."); 2675 if (shouldFixAssignments()) { 2676 errors.print("Trying to fix a problem with hbase:meta, replicaId " + 2677 metaHbckRegionInfo.getReplicaId() + ".."); 2678 setShouldRerun(); 2679 // try fix it (treat is a dupe assignment) 2680 HBaseFsckRepair 2681 .fixMultiAssignment(connection, metaHbckRegionInfo.getMetaEntry(), servers); 2682 } 2683 } 2684 } 2685 } 2686 // unassign whatever is remaining in metaRegions. They are excess replicas. 2687 for (Map.Entry<Integer, HbckRegionInfo> entry : metaRegions.entrySet()) { 2688 noProblem = false; 2689 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, 2690 "hbase:meta replicas are deployed in excess. Configured " + metaReplication + 2691 ", deployed " + metaRegions.size()); 2692 if (shouldFixAssignments()) { 2693 errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() + 2694 " of hbase:meta.."); 2695 setShouldRerun(); 2696 unassignMetaReplica(entry.getValue()); 2697 } 2698 } 2699 // if noProblem is false, rerun hbck with hopefully fixed META 2700 // if noProblem is true, no errors, so continue normally 2701 return noProblem; 2702 } 2703 2704 private void unassignMetaReplica(HbckRegionInfo hi) 2705 throws IOException, InterruptedException, KeeperException { 2706 undeployRegions(hi); 2707 ZKUtil 2708 .deleteNode(zkw, zkw.getZNodePaths().getZNodeForReplica(hi.getMetaEntry().getReplicaId())); 2709 } 2710 2711 private void assignMetaReplica(int replicaId) 2712 throws IOException, KeeperException, InterruptedException { 2713 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " + 2714 replicaId +" is not found on any region."); 2715 if (shouldFixAssignments()) { 2716 errors.print("Trying to fix a problem with hbase:meta.."); 2717 setShouldRerun(); 2718 // try to fix it (treat it as unassigned region) 2719 RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( 2720 RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId); 2721 HBaseFsckRepair.fixUnassigned(admin, h); 2722 HBaseFsckRepair.waitUntilAssigned(admin, h); 2723 } 2724 } 2725 2726 /** 2727 * Scan hbase:meta, adding all regions found to the regionInfo map. 2728 * @throws IOException if an error is encountered 2729 */ 2730 boolean loadMetaEntries() throws IOException { 2731 MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() { 2732 int countRecord = 1; 2733 2734 // comparator to sort KeyValues with latest modtime 2735 final Comparator<Cell> comp = new Comparator<Cell>() { 2736 @Override 2737 public int compare(Cell k1, Cell k2) { 2738 return Long.compare(k1.getTimestamp(), k2.getTimestamp()); 2739 } 2740 }; 2741 2742 @Override 2743 public boolean visit(Result result) throws IOException { 2744 try { 2745 2746 // record the latest modification of this META record 2747 long ts = Collections.max(result.listCells(), comp).getTimestamp(); 2748 RegionLocations rl = MetaTableAccessor.getRegionLocations(result); 2749 if (rl == null) { 2750 emptyRegionInfoQualifiers.add(result); 2751 errors.reportError(ERROR_CODE.EMPTY_META_CELL, 2752 "Empty REGIONINFO_QUALIFIER found in hbase:meta"); 2753 return true; 2754 } 2755 ServerName sn = null; 2756 if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null || 2757 rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) { 2758 emptyRegionInfoQualifiers.add(result); 2759 errors.reportError(ERROR_CODE.EMPTY_META_CELL, 2760 "Empty REGIONINFO_QUALIFIER found in hbase:meta"); 2761 return true; 2762 } 2763 RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo(); 2764 if (!(isTableIncluded(hri.getTable()) 2765 || hri.isMetaRegion())) { 2766 return true; 2767 } 2768 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result); 2769 for (HRegionLocation h : rl.getRegionLocations()) { 2770 if (h == null || h.getRegionInfo() == null) { 2771 continue; 2772 } 2773 sn = h.getServerName(); 2774 hri = h.getRegionInfo(); 2775 2776 HbckRegionInfo.MetaEntry m = null; 2777 if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 2778 m = new HbckRegionInfo.MetaEntry(hri, sn, ts, daughters.getFirst(), 2779 daughters.getSecond()); 2780 } else { 2781 m = new HbckRegionInfo.MetaEntry(hri, sn, ts, null, null); 2782 } 2783 HbckRegionInfo previous = regionInfoMap.get(hri.getEncodedName()); 2784 if (previous == null) { 2785 regionInfoMap.put(hri.getEncodedName(), new HbckRegionInfo(m)); 2786 } else if (previous.getMetaEntry() == null) { 2787 previous.setMetaEntry(m); 2788 } else { 2789 throw new IOException("Two entries in hbase:meta are same " + previous); 2790 } 2791 } 2792 List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(result.rawCells()); 2793 if (mergeParents != null) { 2794 for (RegionInfo mergeRegion : mergeParents) { 2795 if (mergeRegion != null) { 2796 // This region is already being merged 2797 HbckRegionInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName()); 2798 hbInfo.setMerged(true); 2799 } 2800 } 2801 } 2802 2803 // show proof of progress to the user, once for every 100 records. 2804 if (countRecord % 100 == 0) { 2805 errors.progress(); 2806 } 2807 countRecord++; 2808 return true; 2809 } catch (RuntimeException e) { 2810 LOG.error("Result=" + result); 2811 throw e; 2812 } 2813 } 2814 }; 2815 if (!checkMetaOnly) { 2816 // Scan hbase:meta to pick up user regions 2817 MetaTableAccessor.fullScanRegions(connection, visitor); 2818 } 2819 2820 errors.print(""); 2821 return true; 2822 } 2823 2824 /** 2825 * Prints summary of all tables found on the system. 2826 */ 2827 private void printTableSummary(SortedMap<TableName, HbckTableInfo> tablesInfo) { 2828 StringBuilder sb = new StringBuilder(); 2829 int numOfSkippedRegions; 2830 errors.print("Summary:"); 2831 for (HbckTableInfo tInfo : tablesInfo.values()) { 2832 numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ? 2833 skippedRegions.get(tInfo.getName()).size() : 0; 2834 2835 if (errors.tableHasErrors(tInfo)) { 2836 errors.print("Table " + tInfo.getName() + " is inconsistent."); 2837 } else if (numOfSkippedRegions > 0){ 2838 errors.print("Table " + tInfo.getName() + " is okay (with " 2839 + numOfSkippedRegions + " skipped regions)."); 2840 } 2841 else { 2842 errors.print("Table " + tInfo.getName() + " is okay."); 2843 } 2844 errors.print(" Number of regions: " + tInfo.getNumRegions()); 2845 if (numOfSkippedRegions > 0) { 2846 Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName()); 2847 System.out.println(" Number of skipped regions: " + numOfSkippedRegions); 2848 System.out.println(" List of skipped regions:"); 2849 for(String sr : skippedRegionStrings) { 2850 System.out.println(" " + sr); 2851 } 2852 } 2853 sb.setLength(0); // clear out existing buffer, if any. 2854 sb.append(" Deployed on: "); 2855 for (ServerName server : tInfo.deployedOn) { 2856 sb.append(" " + server.toString()); 2857 } 2858 errors.print(sb.toString()); 2859 } 2860 } 2861 2862 static HbckErrorReporter getErrorReporter(final Configuration conf) 2863 throws ClassNotFoundException { 2864 Class<? extends HbckErrorReporter> reporter = 2865 conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, 2866 HbckErrorReporter.class); 2867 return ReflectionUtils.newInstance(reporter, conf); 2868 } 2869 2870 static class PrintingErrorReporter implements HbckErrorReporter { 2871 public int errorCount = 0; 2872 private int showProgress; 2873 // How frequently calls to progress() will create output 2874 private static final int progressThreshold = 100; 2875 2876 Set<HbckTableInfo> errorTables = new HashSet<>(); 2877 2878 // for use by unit tests to verify which errors were discovered 2879 private ArrayList<ERROR_CODE> errorList = new ArrayList<>(); 2880 2881 @Override 2882 public void clear() { 2883 errorTables.clear(); 2884 errorList.clear(); 2885 errorCount = 0; 2886 } 2887 2888 @Override 2889 public synchronized void reportError(ERROR_CODE errorCode, String message) { 2890 if (errorCode == ERROR_CODE.WRONG_USAGE) { 2891 System.err.println(message); 2892 return; 2893 } 2894 2895 errorList.add(errorCode); 2896 if (!summary) { 2897 System.out.println("ERROR: " + message); 2898 } 2899 errorCount++; 2900 showProgress = 0; 2901 } 2902 2903 @Override 2904 public synchronized void reportError(ERROR_CODE errorCode, String message, 2905 HbckTableInfo table) { 2906 errorTables.add(table); 2907 reportError(errorCode, message); 2908 } 2909 2910 @Override 2911 public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table, 2912 HbckRegionInfo info) { 2913 errorTables.add(table); 2914 String reference = "(region " + info.getRegionNameAsString() + ")"; 2915 reportError(errorCode, reference + " " + message); 2916 } 2917 2918 @Override 2919 public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table, 2920 HbckRegionInfo info1, HbckRegionInfo info2) { 2921 errorTables.add(table); 2922 String reference = "(regions " + info1.getRegionNameAsString() 2923 + " and " + info2.getRegionNameAsString() + ")"; 2924 reportError(errorCode, reference + " " + message); 2925 } 2926 2927 @Override 2928 public synchronized void reportError(String message) { 2929 reportError(ERROR_CODE.UNKNOWN, message); 2930 } 2931 2932 /** 2933 * Report error information, but do not increment the error count. Intended for cases 2934 * where the actual error would have been reported previously. 2935 * @param message 2936 */ 2937 @Override 2938 public synchronized void report(String message) { 2939 if (! summary) { 2940 System.out.println("ERROR: " + message); 2941 } 2942 showProgress = 0; 2943 } 2944 2945 @Override 2946 public synchronized int summarize() { 2947 System.out.println(Integer.toString(errorCount) + 2948 " inconsistencies detected."); 2949 if (errorCount == 0) { 2950 System.out.println("Status: OK"); 2951 return 0; 2952 } else { 2953 System.out.println("Status: INCONSISTENT"); 2954 return -1; 2955 } 2956 } 2957 2958 @Override 2959 public ArrayList<ERROR_CODE> getErrorList() { 2960 return errorList; 2961 } 2962 2963 @Override 2964 public synchronized void print(String message) { 2965 if (!summary) { 2966 System.out.println(message); 2967 } 2968 } 2969 2970 @Override 2971 public boolean tableHasErrors(HbckTableInfo table) { 2972 return errorTables.contains(table); 2973 } 2974 2975 @Override 2976 public void resetErrors() { 2977 errorCount = 0; 2978 } 2979 2980 @Override 2981 public synchronized void detail(String message) { 2982 if (details) { 2983 System.out.println(message); 2984 } 2985 showProgress = 0; 2986 } 2987 2988 @Override 2989 public synchronized void progress() { 2990 if (showProgress++ == progressThreshold) { 2991 if (!summary) { 2992 System.out.print("."); 2993 } 2994 showProgress = 0; 2995 } 2996 } 2997 } 2998 2999 /** 3000 * Contact a region server and get all information from it 3001 */ 3002 static class WorkItemRegion implements Callable<Void> { 3003 private final HBaseFsck hbck; 3004 private final ServerName rsinfo; 3005 private final HbckErrorReporter errors; 3006 private final ClusterConnection connection; 3007 3008 WorkItemRegion(HBaseFsck hbck, ServerName info, HbckErrorReporter errors, 3009 ClusterConnection connection) { 3010 this.hbck = hbck; 3011 this.rsinfo = info; 3012 this.errors = errors; 3013 this.connection = connection; 3014 } 3015 3016 @Override 3017 public synchronized Void call() throws IOException { 3018 errors.progress(); 3019 try { 3020 BlockingInterface server = connection.getAdmin(rsinfo); 3021 3022 // list all online regions from this region server 3023 List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(server); 3024 regions = filterRegions(regions); 3025 3026 if (details) { 3027 errors.detail("RegionServer: " + rsinfo.getServerName() + 3028 " number of regions: " + regions.size()); 3029 for (RegionInfo rinfo: regions) { 3030 errors.detail(" " + rinfo.getRegionNameAsString() + 3031 " id: " + rinfo.getRegionId() + 3032 " encoded_name: " + rinfo.getEncodedName() + 3033 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) + 3034 " end: " + Bytes.toStringBinary(rinfo.getEndKey())); 3035 } 3036 } 3037 3038 // check to see if the existence of this region matches the region in META 3039 3040 for (RegionInfo r : regions) { 3041 HbckRegionInfo hbi = hbck.getOrCreateInfo(r.getEncodedName()); 3042 hbi.addServer(r, rsinfo); 3043 } 3044 } catch (IOException e) { // unable to connect to the region server. 3045 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() + 3046 " Unable to fetch region information. " + e); 3047 throw e; 3048 } 3049 return null; 3050 } 3051 3052 private List<RegionInfo> filterRegions(List<RegionInfo> regions) { 3053 List<RegionInfo> ret = Lists.newArrayList(); 3054 for (RegionInfo hri : regions) { 3055 if (hri.isMetaRegion() || (!hbck.checkMetaOnly 3056 && hbck.isTableIncluded(hri.getTable()))) { 3057 ret.add(hri); 3058 } 3059 } 3060 return ret; 3061 } 3062 } 3063 3064 /** 3065 * Contact hdfs and get all information about specified table directory into 3066 * regioninfo list. 3067 */ 3068 class WorkItemHdfsDir implements Callable<Void> { 3069 private FileStatus tableDir; 3070 private HbckErrorReporter errors; 3071 private FileSystem fs; 3072 3073 WorkItemHdfsDir(FileSystem fs, HbckErrorReporter errors, FileStatus status) { 3074 this.fs = fs; 3075 this.tableDir = status; 3076 this.errors = errors; 3077 } 3078 3079 @Override 3080 public synchronized Void call() throws InterruptedException, ExecutionException { 3081 final Vector<Exception> exceptions = new Vector<>(); 3082 3083 try { 3084 final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath()); 3085 final List<Future<?>> futures = new ArrayList<>(regionDirs.length); 3086 3087 for (final FileStatus regionDir : regionDirs) { 3088 errors.progress(); 3089 final String encodedName = regionDir.getPath().getName(); 3090 // ignore directories that aren't hexadecimal 3091 if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) { 3092 continue; 3093 } 3094 3095 if (!exceptions.isEmpty()) { 3096 break; 3097 } 3098 3099 futures.add(executor.submit(new Runnable() { 3100 @Override 3101 public void run() { 3102 try { 3103 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath()); 3104 3105 Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE); 3106 boolean regioninfoFileExists = fs.exists(regioninfoFile); 3107 3108 if (!regioninfoFileExists) { 3109 // As tables become larger it is more and more likely that by the time you 3110 // reach a given region that it will be gone due to region splits/merges. 3111 if (!fs.exists(regionDir.getPath())) { 3112 LOG.warn("By the time we tried to process this region dir it was already gone: " 3113 + regionDir.getPath()); 3114 return; 3115 } 3116 } 3117 3118 HbckRegionInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName); 3119 HbckRegionInfo.HdfsEntry he = new HbckRegionInfo.HdfsEntry(); 3120 synchronized (hbi) { 3121 if (hbi.getHdfsRegionDir() != null) { 3122 errors.print("Directory " + encodedName + " duplicate??" + 3123 hbi.getHdfsRegionDir()); 3124 } 3125 3126 he.regionDir = regionDir.getPath(); 3127 he.regionDirModTime = regionDir.getModificationTime(); 3128 he.hdfsRegioninfoFilePresent = regioninfoFileExists; 3129 // we add to orphan list when we attempt to read .regioninfo 3130 3131 // Set a flag if this region contains only edits 3132 // This is special case if a region is left after split 3133 he.hdfsOnlyEdits = true; 3134 FileStatus[] subDirs = fs.listStatus(regionDir.getPath()); 3135 Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath()); 3136 for (FileStatus subDir : subDirs) { 3137 errors.progress(); 3138 String sdName = subDir.getPath().getName(); 3139 if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) { 3140 he.hdfsOnlyEdits = false; 3141 break; 3142 } 3143 } 3144 hbi.setHdfsEntry(he); 3145 } 3146 } catch (Exception e) { 3147 LOG.error("Could not load region dir", e); 3148 exceptions.add(e); 3149 } 3150 } 3151 })); 3152 } 3153 3154 // Ensure all pending tasks are complete (or that we run into an exception) 3155 for (Future<?> f : futures) { 3156 if (!exceptions.isEmpty()) { 3157 break; 3158 } 3159 try { 3160 f.get(); 3161 } catch (ExecutionException e) { 3162 LOG.error("Unexpected exec exception! Should've been caught already. (Bug?)", e); 3163 // Shouldn't happen, we already logged/caught any exceptions in the Runnable 3164 }; 3165 } 3166 } catch (IOException e) { 3167 LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e); 3168 exceptions.add(e); 3169 } finally { 3170 if (!exceptions.isEmpty()) { 3171 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: " 3172 + tableDir.getPath().getName() 3173 + " Unable to fetch all HDFS region information. "); 3174 // Just throw the first exception as an indication something bad happened 3175 // Don't need to propagate all the exceptions, we already logged them all anyway 3176 throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement()); 3177 } 3178 } 3179 return null; 3180 } 3181 } 3182 3183 /** 3184 * Contact hdfs and get all information about specified table directory into 3185 * regioninfo list. 3186 */ 3187 static class WorkItemHdfsRegionInfo implements Callable<Void> { 3188 private HbckRegionInfo hbi; 3189 private HBaseFsck hbck; 3190 private HbckErrorReporter errors; 3191 3192 WorkItemHdfsRegionInfo(HbckRegionInfo hbi, HBaseFsck hbck, HbckErrorReporter errors) { 3193 this.hbi = hbi; 3194 this.hbck = hbck; 3195 this.errors = errors; 3196 } 3197 3198 @Override 3199 public synchronized Void call() throws IOException { 3200 // only load entries that haven't been loaded yet. 3201 if (hbi.getHdfsHRI() == null) { 3202 try { 3203 errors.progress(); 3204 hbi.loadHdfsRegioninfo(hbck.getConf()); 3205 } catch (IOException ioe) { 3206 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table " 3207 + hbi.getTableName() + " in hdfs dir " 3208 + hbi.getHdfsRegionDir() 3209 + "! It may be an invalid format or version file. Treating as " 3210 + "an orphaned regiondir."; 3211 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg); 3212 try { 3213 hbck.debugLsr(hbi.getHdfsRegionDir()); 3214 } catch (IOException ioe2) { 3215 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2); 3216 throw ioe2; 3217 } 3218 hbck.orphanHdfsDirs.add(hbi); 3219 throw ioe; 3220 } 3221 } 3222 return null; 3223 } 3224 }; 3225 3226 /** 3227 * Display the full report from fsck. This displays all live and dead region 3228 * servers, and all known regions. 3229 */ 3230 public static void setDisplayFullReport() { 3231 details = true; 3232 } 3233 3234 public static boolean shouldDisplayFullReport() { 3235 return details; 3236 } 3237 3238 /** 3239 * Set exclusive mode. 3240 */ 3241 public static void setForceExclusive() { 3242 forceExclusive = true; 3243 } 3244 3245 /** 3246 * Only one instance of hbck can modify HBase at a time. 3247 */ 3248 public boolean isExclusive() { 3249 return fixAny || forceExclusive; 3250 } 3251 3252 /** 3253 * Set summary mode. 3254 * Print only summary of the tables and status (OK or INCONSISTENT) 3255 */ 3256 static void setSummary() { 3257 summary = true; 3258 } 3259 3260 /** 3261 * Set hbase:meta check mode. 3262 * Print only info about hbase:meta table deployment/state 3263 */ 3264 void setCheckMetaOnly() { 3265 checkMetaOnly = true; 3266 } 3267 3268 /** 3269 * Set region boundaries check mode. 3270 */ 3271 void setRegionBoundariesCheck() { 3272 checkRegionBoundaries = true; 3273 } 3274 3275 /** 3276 * Set replication fix mode. 3277 */ 3278 public void setFixReplication(boolean shouldFix) { 3279 fixReplication = shouldFix; 3280 fixAny |= shouldFix; 3281 } 3282 3283 /** 3284 * Check if we should rerun fsck again. This checks if we've tried to 3285 * fix something and we should rerun fsck tool again. 3286 * Display the full report from fsck. This displays all live and dead 3287 * region servers, and all known regions. 3288 */ 3289 void setShouldRerun() { 3290 rerun = true; 3291 } 3292 3293 boolean shouldRerun() { 3294 return rerun; 3295 } 3296 3297 /** 3298 * Fix inconsistencies found by fsck. This should try to fix errors (if any) 3299 * found by fsck utility. 3300 */ 3301 public void setFixAssignments(boolean shouldFix) { 3302 fixAssignments = shouldFix; 3303 fixAny |= shouldFix; 3304 } 3305 3306 boolean shouldFixAssignments() { 3307 return fixAssignments; 3308 } 3309 3310 public void setFixMeta(boolean shouldFix) { 3311 fixMeta = shouldFix; 3312 fixAny |= shouldFix; 3313 } 3314 3315 boolean shouldFixMeta() { 3316 return fixMeta; 3317 } 3318 3319 public void setFixEmptyMetaCells(boolean shouldFix) { 3320 fixEmptyMetaCells = shouldFix; 3321 fixAny |= shouldFix; 3322 } 3323 3324 boolean shouldFixEmptyMetaCells() { 3325 return fixEmptyMetaCells; 3326 } 3327 3328 public void setCheckHdfs(boolean checking) { 3329 checkHdfs = checking; 3330 } 3331 3332 boolean shouldCheckHdfs() { 3333 return checkHdfs; 3334 } 3335 3336 public void setFixHdfsHoles(boolean shouldFix) { 3337 fixHdfsHoles = shouldFix; 3338 fixAny |= shouldFix; 3339 } 3340 3341 boolean shouldFixHdfsHoles() { 3342 return fixHdfsHoles; 3343 } 3344 3345 public void setFixTableOrphans(boolean shouldFix) { 3346 fixTableOrphans = shouldFix; 3347 fixAny |= shouldFix; 3348 } 3349 3350 boolean shouldFixTableOrphans() { 3351 return fixTableOrphans; 3352 } 3353 3354 public void setFixHdfsOverlaps(boolean shouldFix) { 3355 fixHdfsOverlaps = shouldFix; 3356 fixAny |= shouldFix; 3357 } 3358 3359 boolean shouldFixHdfsOverlaps() { 3360 return fixHdfsOverlaps; 3361 } 3362 3363 public void setFixHdfsOrphans(boolean shouldFix) { 3364 fixHdfsOrphans = shouldFix; 3365 fixAny |= shouldFix; 3366 } 3367 3368 boolean shouldFixHdfsOrphans() { 3369 return fixHdfsOrphans; 3370 } 3371 3372 public void setFixVersionFile(boolean shouldFix) { 3373 fixVersionFile = shouldFix; 3374 fixAny |= shouldFix; 3375 } 3376 3377 public boolean shouldFixVersionFile() { 3378 return fixVersionFile; 3379 } 3380 3381 public void setSidelineBigOverlaps(boolean sbo) { 3382 this.sidelineBigOverlaps = sbo; 3383 } 3384 3385 public boolean shouldSidelineBigOverlaps() { 3386 return sidelineBigOverlaps; 3387 } 3388 3389 public void setFixSplitParents(boolean shouldFix) { 3390 fixSplitParents = shouldFix; 3391 fixAny |= shouldFix; 3392 } 3393 3394 public void setRemoveParents(boolean shouldFix) { 3395 removeParents = shouldFix; 3396 fixAny |= shouldFix; 3397 } 3398 3399 boolean shouldFixSplitParents() { 3400 return fixSplitParents; 3401 } 3402 3403 boolean shouldRemoveParents() { 3404 return removeParents; 3405 } 3406 3407 public void setFixReferenceFiles(boolean shouldFix) { 3408 fixReferenceFiles = shouldFix; 3409 fixAny |= shouldFix; 3410 } 3411 3412 boolean shouldFixReferenceFiles() { 3413 return fixReferenceFiles; 3414 } 3415 3416 public void setFixHFileLinks(boolean shouldFix) { 3417 fixHFileLinks = shouldFix; 3418 fixAny |= shouldFix; 3419 } 3420 3421 boolean shouldFixHFileLinks() { 3422 return fixHFileLinks; 3423 } 3424 3425 public boolean shouldIgnorePreCheckPermission() { 3426 return !fixAny || ignorePreCheckPermission; 3427 } 3428 3429 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) { 3430 this.ignorePreCheckPermission = ignorePreCheckPermission; 3431 } 3432 3433 /** 3434 * @param mm maximum number of regions to merge into a single region. 3435 */ 3436 public void setMaxMerge(int mm) { 3437 this.maxMerge = mm; 3438 } 3439 3440 public int getMaxMerge() { 3441 return maxMerge; 3442 } 3443 3444 public void setMaxOverlapsToSideline(int mo) { 3445 this.maxOverlapsToSideline = mo; 3446 } 3447 3448 public int getMaxOverlapsToSideline() { 3449 return maxOverlapsToSideline; 3450 } 3451 3452 /** 3453 * Only check/fix tables specified by the list, 3454 * Empty list means all tables are included. 3455 */ 3456 boolean isTableIncluded(TableName table) { 3457 return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table); 3458 } 3459 3460 public void includeTable(TableName table) { 3461 tablesIncluded.add(table); 3462 } 3463 3464 Set<TableName> getIncludedTables() { 3465 return new HashSet<>(tablesIncluded); 3466 } 3467 3468 /** 3469 * We are interested in only those tables that have not changed their state in 3470 * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag 3471 * @param seconds - the time in seconds 3472 */ 3473 public void setTimeLag(long seconds) { 3474 timelag = seconds * 1000; // convert to milliseconds 3475 } 3476 3477 /** 3478 * 3479 * @param sidelineDir - HDFS path to sideline data 3480 */ 3481 public void setSidelineDir(String sidelineDir) { 3482 this.sidelineDir = new Path(sidelineDir); 3483 } 3484 3485 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { 3486 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles); 3487 } 3488 3489 public HFileCorruptionChecker getHFilecorruptionChecker() { 3490 return hfcc; 3491 } 3492 3493 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) { 3494 this.hfcc = hfcc; 3495 } 3496 3497 public void setRetCode(int code) { 3498 this.retcode = code; 3499 } 3500 3501 public int getRetCode() { 3502 return retcode; 3503 } 3504 3505 protected HBaseFsck printUsageAndExit() { 3506 StringWriter sw = new StringWriter(2048); 3507 PrintWriter out = new PrintWriter(sw); 3508 out.println(""); 3509 out.println("-----------------------------------------------------------------------"); 3510 out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed."); 3511 out.println("In general, all Read-Only options are supported and can be be used"); 3512 out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage"); 3513 out.println("below for details on which options are not supported."); 3514 out.println("-----------------------------------------------------------------------"); 3515 out.println(""); 3516 out.println("Usage: fsck [opts] {only tables}"); 3517 out.println(" where [opts] are:"); 3518 out.println(" -help Display help options (this)"); 3519 out.println(" -details Display full report of all regions."); 3520 out.println(" -timelag <timeInSeconds> Process only regions that " + 3521 " have not experienced any metadata updates in the last " + 3522 " <timeInSeconds> seconds."); 3523 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" + 3524 " before checking if the fix worked if run with -fix"); 3525 out.println(" -summary Print only summary of the tables and status."); 3526 out.println(" -metaonly Only check the state of the hbase:meta table."); 3527 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta."); 3528 out.println(" -boundaries Verify that regions boundaries are the same between META and store files."); 3529 out.println(" -exclusive Abort if another hbck is exclusive or fixing."); 3530 3531 out.println(""); 3532 out.println(" Datafile Repair options: (expert features, use with caution!)"); 3533 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid"); 3534 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles"); 3535 3536 out.println(""); 3537 out.println(" Replication options"); 3538 out.println(" -fixReplication Deletes replication queues for removed peers"); 3539 3540 out.println(""); 3541 out.println(" Metadata Repair options supported as of version 2.0: (expert features, use with caution!)"); 3542 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs."); 3543 out.println(" -fixReferenceFiles Try to offline lingering reference store files"); 3544 out.println(" -fixHFileLinks Try to offline lingering HFileLinks"); 3545 out.println(" -noHdfsChecking Don't load/check region info from HDFS." 3546 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap"); 3547 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check"); 3548 3549 out.println(""); 3550 out.println("NOTE: Following options are NOT supported as of HBase version 2.0+."); 3551 out.println(""); 3552 out.println(" UNSUPPORTED Metadata Repair options: (expert features, use with caution!)"); 3553 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity"); 3554 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix"); 3555 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good."); 3556 out.println(" -fixHdfsHoles Try to fix region holes in hdfs."); 3557 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs"); 3558 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)"); 3559 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs."); 3560 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)"); 3561 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps"); 3562 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)"); 3563 out.println(" -fixSplitParents Try to force offline split parents to be online."); 3564 out.println(" -removeParents Try to offline and sideline lingering parents and keep daughter regions."); 3565 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region" 3566 + " (empty REGIONINFO_QUALIFIER rows)"); 3567 3568 out.println(""); 3569 out.println(" UNSUPPORTED Metadata Repair shortcuts"); 3570 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + 3571 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" + 3572 "-fixHFileLinks"); 3573 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles"); 3574 3575 out.flush(); 3576 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString()); 3577 3578 setRetCode(-2); 3579 return this; 3580 } 3581 3582 /** 3583 * Main program 3584 * 3585 * @param args 3586 * @throws Exception 3587 */ 3588 public static void main(String[] args) throws Exception { 3589 // create a fsck object 3590 Configuration conf = HBaseConfiguration.create(); 3591 Path hbasedir = FSUtils.getRootDir(conf); 3592 URI defaultFs = hbasedir.getFileSystem(conf).getUri(); 3593 FSUtils.setFsDefault(conf, new Path(defaultFs)); 3594 int ret = ToolRunner.run(new HBaseFsckTool(conf), args); 3595 System.exit(ret); 3596 } 3597 3598 /** 3599 * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line. 3600 */ 3601 static class HBaseFsckTool extends Configured implements Tool { 3602 HBaseFsckTool(Configuration conf) { super(conf); } 3603 @Override 3604 public int run(String[] args) throws Exception { 3605 HBaseFsck hbck = new HBaseFsck(getConf()); 3606 hbck.exec(hbck.executor, args); 3607 hbck.close(); 3608 return hbck.getRetCode(); 3609 } 3610 }; 3611 3612 3613 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException, 3614 InterruptedException { 3615 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN; 3616 3617 boolean checkCorruptHFiles = false; 3618 boolean sidelineCorruptHFiles = false; 3619 3620 // Process command-line args. 3621 for (int i = 0; i < args.length; i++) { 3622 String cmd = args[i]; 3623 if (cmd.equals("-help") || cmd.equals("-h")) { 3624 return printUsageAndExit(); 3625 } else if (cmd.equals("-details")) { 3626 setDisplayFullReport(); 3627 } else if (cmd.equals("-exclusive")) { 3628 setForceExclusive(); 3629 } else if (cmd.equals("-timelag")) { 3630 if (i == args.length - 1) { 3631 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value."); 3632 return printUsageAndExit(); 3633 } 3634 try { 3635 long timelag = Long.parseLong(args[i+1]); 3636 setTimeLag(timelag); 3637 } catch (NumberFormatException e) { 3638 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value."); 3639 return printUsageAndExit(); 3640 } 3641 i++; 3642 } else if (cmd.equals("-sleepBeforeRerun")) { 3643 if (i == args.length - 1) { 3644 errors.reportError(ERROR_CODE.WRONG_USAGE, 3645 "HBaseFsck: -sleepBeforeRerun needs a value."); 3646 return printUsageAndExit(); 3647 } 3648 try { 3649 sleepBeforeRerun = Long.parseLong(args[i+1]); 3650 } catch (NumberFormatException e) { 3651 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value."); 3652 return printUsageAndExit(); 3653 } 3654 i++; 3655 } else if (cmd.equals("-sidelineDir")) { 3656 if (i == args.length - 1) { 3657 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value."); 3658 return printUsageAndExit(); 3659 } 3660 i++; 3661 setSidelineDir(args[i]); 3662 } else if (cmd.equals("-fix")) { 3663 errors.reportError(ERROR_CODE.WRONG_USAGE, 3664 "This option is deprecated, please use -fixAssignments instead."); 3665 setFixAssignments(true); 3666 } else if (cmd.equals("-fixAssignments")) { 3667 setFixAssignments(true); 3668 } else if (cmd.equals("-fixMeta")) { 3669 setFixMeta(true); 3670 } else if (cmd.equals("-noHdfsChecking")) { 3671 setCheckHdfs(false); 3672 } else if (cmd.equals("-fixHdfsHoles")) { 3673 setFixHdfsHoles(true); 3674 } else if (cmd.equals("-fixHdfsOrphans")) { 3675 setFixHdfsOrphans(true); 3676 } else if (cmd.equals("-fixTableOrphans")) { 3677 setFixTableOrphans(true); 3678 } else if (cmd.equals("-fixHdfsOverlaps")) { 3679 setFixHdfsOverlaps(true); 3680 } else if (cmd.equals("-fixVersionFile")) { 3681 setFixVersionFile(true); 3682 } else if (cmd.equals("-sidelineBigOverlaps")) { 3683 setSidelineBigOverlaps(true); 3684 } else if (cmd.equals("-fixSplitParents")) { 3685 setFixSplitParents(true); 3686 } else if (cmd.equals("-removeParents")) { 3687 setRemoveParents(true); 3688 } else if (cmd.equals("-ignorePreCheckPermission")) { 3689 setIgnorePreCheckPermission(true); 3690 } else if (cmd.equals("-checkCorruptHFiles")) { 3691 checkCorruptHFiles = true; 3692 } else if (cmd.equals("-sidelineCorruptHFiles")) { 3693 sidelineCorruptHFiles = true; 3694 } else if (cmd.equals("-fixReferenceFiles")) { 3695 setFixReferenceFiles(true); 3696 } else if (cmd.equals("-fixHFileLinks")) { 3697 setFixHFileLinks(true); 3698 } else if (cmd.equals("-fixEmptyMetaCells")) { 3699 setFixEmptyMetaCells(true); 3700 } else if (cmd.equals("-repair")) { 3701 // this attempts to merge overlapping hdfs regions, needs testing 3702 // under load 3703 setFixHdfsHoles(true); 3704 setFixHdfsOrphans(true); 3705 setFixMeta(true); 3706 setFixAssignments(true); 3707 setFixHdfsOverlaps(true); 3708 setFixVersionFile(true); 3709 setSidelineBigOverlaps(true); 3710 setFixSplitParents(false); 3711 setCheckHdfs(true); 3712 setFixReferenceFiles(true); 3713 setFixHFileLinks(true); 3714 } else if (cmd.equals("-repairHoles")) { 3715 // this will make all missing hdfs regions available but may lose data 3716 setFixHdfsHoles(true); 3717 setFixHdfsOrphans(false); 3718 setFixMeta(true); 3719 setFixAssignments(true); 3720 setFixHdfsOverlaps(false); 3721 setSidelineBigOverlaps(false); 3722 setFixSplitParents(false); 3723 setCheckHdfs(true); 3724 } else if (cmd.equals("-maxOverlapsToSideline")) { 3725 if (i == args.length - 1) { 3726 errors.reportError(ERROR_CODE.WRONG_USAGE, 3727 "-maxOverlapsToSideline needs a numeric value argument."); 3728 return printUsageAndExit(); 3729 } 3730 try { 3731 int maxOverlapsToSideline = Integer.parseInt(args[i+1]); 3732 setMaxOverlapsToSideline(maxOverlapsToSideline); 3733 } catch (NumberFormatException e) { 3734 errors.reportError(ERROR_CODE.WRONG_USAGE, 3735 "-maxOverlapsToSideline needs a numeric value argument."); 3736 return printUsageAndExit(); 3737 } 3738 i++; 3739 } else if (cmd.equals("-maxMerge")) { 3740 if (i == args.length - 1) { 3741 errors.reportError(ERROR_CODE.WRONG_USAGE, 3742 "-maxMerge needs a numeric value argument."); 3743 return printUsageAndExit(); 3744 } 3745 try { 3746 int maxMerge = Integer.parseInt(args[i+1]); 3747 setMaxMerge(maxMerge); 3748 } catch (NumberFormatException e) { 3749 errors.reportError(ERROR_CODE.WRONG_USAGE, 3750 "-maxMerge needs a numeric value argument."); 3751 return printUsageAndExit(); 3752 } 3753 i++; 3754 } else if (cmd.equals("-summary")) { 3755 setSummary(); 3756 } else if (cmd.equals("-metaonly")) { 3757 setCheckMetaOnly(); 3758 } else if (cmd.equals("-boundaries")) { 3759 setRegionBoundariesCheck(); 3760 } else if (cmd.equals("-fixReplication")) { 3761 setFixReplication(true); 3762 } else if (cmd.startsWith("-")) { 3763 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd); 3764 return printUsageAndExit(); 3765 } else { 3766 includeTable(TableName.valueOf(cmd)); 3767 errors.print("Allow checking/fixes for table: " + cmd); 3768 } 3769 } 3770 3771 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " ")); 3772 3773 // pre-check current user has FS write permission or not 3774 try { 3775 preCheckPermission(); 3776 } catch (AccessDeniedException ace) { 3777 Runtime.getRuntime().exit(-1); 3778 } catch (IOException ioe) { 3779 Runtime.getRuntime().exit(-1); 3780 } 3781 3782 // do the real work of hbck 3783 connect(); 3784 3785 // after connecting to server above, we have server version 3786 // check if unsupported option is specified based on server version 3787 if (!isOptionsSupported(args)) { 3788 return printUsageAndExit(); 3789 } 3790 3791 try { 3792 // if corrupt file mode is on, first fix them since they may be opened later 3793 if (checkCorruptHFiles || sidelineCorruptHFiles) { 3794 LOG.info("Checking all hfiles for corruption"); 3795 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles); 3796 setHFileCorruptionChecker(hfcc); // so we can get result 3797 Collection<TableName> tables = getIncludedTables(); 3798 Collection<Path> tableDirs = new ArrayList<>(); 3799 Path rootdir = FSUtils.getRootDir(getConf()); 3800 if (tables.size() > 0) { 3801 for (TableName t : tables) { 3802 tableDirs.add(FSUtils.getTableDir(rootdir, t)); 3803 } 3804 } else { 3805 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir); 3806 } 3807 hfcc.checkTables(tableDirs); 3808 hfcc.report(errors); 3809 } 3810 3811 // check and fix table integrity, region consistency. 3812 int code = onlineHbck(); 3813 setRetCode(code); 3814 // If we have changed the HBase state it is better to run hbck again 3815 // to see if we haven't broken something else in the process. 3816 // We run it only once more because otherwise we can easily fall into 3817 // an infinite loop. 3818 if (shouldRerun()) { 3819 try { 3820 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix..."); 3821 Thread.sleep(sleepBeforeRerun); 3822 } catch (InterruptedException ie) { 3823 LOG.warn("Interrupted while sleeping"); 3824 return this; 3825 } 3826 // Just report 3827 setFixAssignments(false); 3828 setFixMeta(false); 3829 setFixHdfsHoles(false); 3830 setFixHdfsOverlaps(false); 3831 setFixVersionFile(false); 3832 setFixTableOrphans(false); 3833 errors.resetErrors(); 3834 code = onlineHbck(); 3835 setRetCode(code); 3836 } 3837 } finally { 3838 IOUtils.closeQuietly(this); 3839 } 3840 return this; 3841 } 3842 3843 private boolean isOptionsSupported(String[] args) { 3844 boolean result = true; 3845 String hbaseServerVersion = status.getHBaseVersion(); 3846 Object[] versionComponents = VersionInfo.getVersionComponents(hbaseServerVersion); 3847 if (versionComponents[0] instanceof Integer && ((Integer)versionComponents[0]) >= 2) { 3848 // Process command-line args. 3849 for (String arg : args) { 3850 if (unsupportedOptionsInV2.contains(arg)) { 3851 errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION, 3852 "option '" + arg + "' is not " + "supportted!"); 3853 result = false; 3854 break; 3855 } 3856 } 3857 } 3858 return result; 3859 } 3860 3861 /** 3862 * ls -r for debugging purposes 3863 */ 3864 void debugLsr(Path p) throws IOException { 3865 debugLsr(getConf(), p, errors); 3866 } 3867 3868 /** 3869 * ls -r for debugging purposes 3870 */ 3871 public static void debugLsr(Configuration conf, 3872 Path p) throws IOException { 3873 debugLsr(conf, p, new PrintingErrorReporter()); 3874 } 3875 3876 /** 3877 * ls -r for debugging purposes 3878 */ 3879 public static void debugLsr(Configuration conf, 3880 Path p, HbckErrorReporter errors) throws IOException { 3881 if (!LOG.isDebugEnabled() || p == null) { 3882 return; 3883 } 3884 FileSystem fs = p.getFileSystem(conf); 3885 3886 if (!fs.exists(p)) { 3887 // nothing 3888 return; 3889 } 3890 errors.print(p.toString()); 3891 3892 if (fs.isFile(p)) { 3893 return; 3894 } 3895 3896 if (fs.getFileStatus(p).isDirectory()) { 3897 FileStatus[] fss= fs.listStatus(p); 3898 for (FileStatus status : fss) { 3899 debugLsr(conf, status.getPath(), errors); 3900 } 3901 } 3902 } 3903}