001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import java.util.Set; 029import java.util.concurrent.Future; 030import java.util.concurrent.TimeUnit; 031import java.util.concurrent.atomic.AtomicBoolean; 032import java.util.concurrent.locks.Condition; 033import java.util.concurrent.locks.ReentrantLock; 034import java.util.stream.Collectors; 035 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.hbase.DoNotRetryIOException; 038import org.apache.hadoop.hbase.HBaseIOException; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.MetaTableAccessor; 041import org.apache.hadoop.hbase.PleaseHoldException; 042import org.apache.hadoop.hbase.ServerName; 043import org.apache.hadoop.hbase.TableName; 044import org.apache.hadoop.hbase.UnknownRegionException; 045import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 046import org.apache.hadoop.hbase.client.MasterSwitchType; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionStatesCount; 050import org.apache.hadoop.hbase.client.Result; 051import org.apache.hadoop.hbase.client.TableState; 052import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; 053import org.apache.hadoop.hbase.favored.FavoredNodesManager; 054import org.apache.hadoop.hbase.favored.FavoredNodesPromoter; 055import org.apache.hadoop.hbase.master.LoadBalancer; 056import org.apache.hadoop.hbase.master.MasterServices; 057import org.apache.hadoop.hbase.master.MetricsAssignmentManager; 058import org.apache.hadoop.hbase.master.RegionPlan; 059import org.apache.hadoop.hbase.master.RegionState; 060import org.apache.hadoop.hbase.master.RegionState.State; 061import org.apache.hadoop.hbase.master.ServerManager; 062import org.apache.hadoop.hbase.master.TableStateManager; 063import org.apache.hadoop.hbase.master.balancer.FavoredStochasticBalancer; 064import org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure; 065import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 066import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler; 067import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait; 068import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 069import org.apache.hadoop.hbase.procedure2.Procedure; 070import org.apache.hadoop.hbase.procedure2.ProcedureEvent; 071import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 072import org.apache.hadoop.hbase.procedure2.ProcedureInMemoryChore; 073import org.apache.hadoop.hbase.procedure2.util.StringUtils; 074import org.apache.hadoop.hbase.regionserver.SequenceId; 075import org.apache.hadoop.hbase.util.Bytes; 076import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 077import org.apache.hadoop.hbase.util.Pair; 078import org.apache.hadoop.hbase.util.Threads; 079import org.apache.hadoop.hbase.util.VersionInfo; 080import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 081import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 082import org.apache.yetus.audience.InterfaceAudience; 083import org.apache.zookeeper.KeeperException; 084import org.slf4j.Logger; 085import org.slf4j.LoggerFactory; 086 087import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 088 089import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 090import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition; 091import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 092import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 093import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 094 095/** 096 * The AssignmentManager is the coordinator for region assign/unassign operations. 097 * <ul> 098 * <li>In-memory states of regions and servers are stored in {@link RegionStates}.</li> 099 * <li>hbase:meta state updates are handled by {@link RegionStateStore}.</li> 100 * </ul> 101 * Regions are created by CreateTable, Split, Merge. 102 * Regions are deleted by DeleteTable, Split, Merge. 103 * Assigns are triggered by CreateTable, EnableTable, Split, Merge, ServerCrash. 104 * Unassigns are triggered by DisableTable, Split, Merge 105 */ 106@InterfaceAudience.Private 107public class AssignmentManager { 108 private static final Logger LOG = LoggerFactory.getLogger(AssignmentManager.class); 109 110 // TODO: AMv2 111 // - handle region migration from hbase1 to hbase2. 112 // - handle sys table assignment first (e.g. acl, namespace) 113 // - handle table priorities 114 // - If ServerBusyException trying to update hbase:meta, we abort the Master 115 // See updateRegionLocation in RegionStateStore. 116 // 117 // See also 118 // https://docs.google.com/document/d/1eVKa7FHdeoJ1-9o8yZcOTAQbv0u0bblBlCCzVSIn69g/edit#heading=h.ystjyrkbtoq5 119 // for other TODOs. 120 121 public static final String BOOTSTRAP_THREAD_POOL_SIZE_CONF_KEY = 122 "hbase.assignment.bootstrap.thread.pool.size"; 123 124 public static final String ASSIGN_DISPATCH_WAIT_MSEC_CONF_KEY = 125 "hbase.assignment.dispatch.wait.msec"; 126 private static final int DEFAULT_ASSIGN_DISPATCH_WAIT_MSEC = 150; 127 128 public static final String ASSIGN_DISPATCH_WAITQ_MAX_CONF_KEY = 129 "hbase.assignment.dispatch.wait.queue.max.size"; 130 private static final int DEFAULT_ASSIGN_DISPATCH_WAITQ_MAX = 100; 131 132 public static final String RIT_CHORE_INTERVAL_MSEC_CONF_KEY = 133 "hbase.assignment.rit.chore.interval.msec"; 134 private static final int DEFAULT_RIT_CHORE_INTERVAL_MSEC = 60 * 1000; 135 136 public static final String DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC_CONF_KEY = 137 "hbase.assignment.dead.region.metric.chore.interval.msec"; 138 private static final int DEFAULT_DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC = 120 * 1000; 139 140 public static final String ASSIGN_MAX_ATTEMPTS = 141 "hbase.assignment.maximum.attempts"; 142 private static final int DEFAULT_ASSIGN_MAX_ATTEMPTS = Integer.MAX_VALUE; 143 144 public static final String ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS = 145 "hbase.assignment.retry.immediately.maximum.attempts"; 146 private static final int DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS = 3; 147 148 /** Region in Transition metrics threshold time */ 149 public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD = 150 "hbase.metrics.rit.stuck.warning.threshold"; 151 private static final int DEFAULT_RIT_STUCK_WARNING_THRESHOLD = 60 * 1000; 152 153 private final ProcedureEvent<?> metaAssignEvent = new ProcedureEvent<>("meta assign"); 154 private final ProcedureEvent<?> metaLoadEvent = new ProcedureEvent<>("meta load"); 155 156 private final MetricsAssignmentManager metrics; 157 private final RegionInTransitionChore ritChore; 158 private final DeadServerMetricRegionChore deadMetricChore; 159 private final MasterServices master; 160 161 private final AtomicBoolean running = new AtomicBoolean(false); 162 private final RegionStates regionStates = new RegionStates(); 163 private final RegionStateStore regionStateStore; 164 165 private final Map<ServerName, Set<byte[]>> rsReports = new HashMap<>(); 166 167 private final boolean shouldAssignRegionsWithFavoredNodes; 168 private final int assignDispatchWaitQueueMaxSize; 169 private final int assignDispatchWaitMillis; 170 private final int assignMaxAttempts; 171 private final int assignRetryImmediatelyMaxAttempts; 172 173 private final Object checkIfShouldMoveSystemRegionLock = new Object(); 174 175 private Thread assignThread; 176 177 public AssignmentManager(final MasterServices master) { 178 this(master, new RegionStateStore(master)); 179 } 180 181 @VisibleForTesting 182 AssignmentManager(final MasterServices master, final RegionStateStore stateStore) { 183 this.master = master; 184 this.regionStateStore = stateStore; 185 this.metrics = new MetricsAssignmentManager(); 186 187 final Configuration conf = master.getConfiguration(); 188 189 // Only read favored nodes if using the favored nodes load balancer. 190 this.shouldAssignRegionsWithFavoredNodes = FavoredStochasticBalancer.class.isAssignableFrom( 191 conf.getClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class)); 192 193 this.assignDispatchWaitMillis = conf.getInt(ASSIGN_DISPATCH_WAIT_MSEC_CONF_KEY, 194 DEFAULT_ASSIGN_DISPATCH_WAIT_MSEC); 195 this.assignDispatchWaitQueueMaxSize = conf.getInt(ASSIGN_DISPATCH_WAITQ_MAX_CONF_KEY, 196 DEFAULT_ASSIGN_DISPATCH_WAITQ_MAX); 197 198 this.assignMaxAttempts = Math.max(1, conf.getInt(ASSIGN_MAX_ATTEMPTS, 199 DEFAULT_ASSIGN_MAX_ATTEMPTS)); 200 this.assignRetryImmediatelyMaxAttempts = conf.getInt(ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS, 201 DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS); 202 203 int ritChoreInterval = conf.getInt(RIT_CHORE_INTERVAL_MSEC_CONF_KEY, 204 DEFAULT_RIT_CHORE_INTERVAL_MSEC); 205 this.ritChore = new RegionInTransitionChore(ritChoreInterval); 206 207 int deadRegionChoreInterval = conf.getInt(DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC_CONF_KEY, 208 DEFAULT_DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC); 209 if (deadRegionChoreInterval > 0) { 210 this.deadMetricChore = new DeadServerMetricRegionChore(deadRegionChoreInterval); 211 } else { 212 this.deadMetricChore = null; 213 } 214 } 215 216 public void start() throws IOException, KeeperException { 217 if (!running.compareAndSet(false, true)) { 218 return; 219 } 220 221 LOG.trace("Starting assignment manager"); 222 223 // Start the Assignment Thread 224 startAssignmentThread(); 225 226 // load meta region state 227 ZKWatcher zkw = master.getZooKeeper(); 228 // it could be null in some tests 229 if (zkw != null) { 230 // here we are still in the early steps of active master startup. There is only one thread(us) 231 // can access AssignmentManager and create region node, so here we do not need to lock the 232 // region node. 233 RegionState regionState = MetaTableLocator.getMetaRegionState(zkw); 234 RegionStateNode regionNode = 235 regionStates.getOrCreateRegionStateNode(RegionInfoBuilder.FIRST_META_REGIONINFO); 236 regionNode.setRegionLocation(regionState.getServerName()); 237 regionNode.setState(regionState.getState()); 238 if (regionNode.getProcedure() != null) { 239 regionNode.getProcedure().stateLoaded(this, regionNode); 240 } 241 if (regionState.getServerName() != null) { 242 regionStates.addRegionToServer(regionNode); 243 } 244 setMetaAssigned(regionState.getRegion(), regionState.getState() == State.OPEN); 245 } 246 } 247 248 /** 249 * Create RegionStateNode based on the TRSP list, and attach the TRSP to the RegionStateNode. 250 * <p> 251 * This is used to restore the RIT region list, so we do not need to restore it in the loadingMeta 252 * method below. And it is also very important as now before submitting a TRSP, we need to attach 253 * it to the RegionStateNode, which acts like a guard, so we need to restore this information at 254 * the very beginning, before we start processing any procedures. 255 */ 256 public void setupRIT(List<TransitRegionStateProcedure> procs) { 257 procs.forEach(proc -> { 258 RegionInfo regionInfo = proc.getRegion(); 259 RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo); 260 TransitRegionStateProcedure existingProc = regionNode.getProcedure(); 261 if (existingProc != null) { 262 // This is possible, as we will detach the procedure from the RSN before we 263 // actually finish the procedure. This is because that, we will detach the TRSP from the RSN 264 // during execution, at that time, the procedure has not been marked as done in the pv2 265 // framework yet, so it is possible that we schedule a new TRSP immediately and when 266 // arriving here, we will find out that there are multiple TRSPs for the region. But we can 267 // make sure that, only the last one can take the charge, the previous ones should have all 268 // been finished already. So here we will compare the proc id, the greater one will win. 269 if (existingProc.getProcId() < proc.getProcId()) { 270 // the new one wins, unset and set it to the new one below 271 regionNode.unsetProcedure(existingProc); 272 } else { 273 // the old one wins, skip 274 return; 275 } 276 } 277 LOG.info("Attach {} to {} to restore RIT", proc, regionNode); 278 regionNode.setProcedure(proc); 279 }); 280 } 281 282 public void stop() { 283 if (!running.compareAndSet(true, false)) { 284 return; 285 } 286 287 LOG.info("Stopping assignment manager"); 288 289 // The AM is started before the procedure executor, 290 // but the actual work will be loaded/submitted only once we have the executor 291 final boolean hasProcExecutor = master.getMasterProcedureExecutor() != null; 292 293 // Remove the RIT chore 294 if (hasProcExecutor) { 295 master.getMasterProcedureExecutor().removeChore(this.ritChore); 296 if (this.deadMetricChore != null) { 297 master.getMasterProcedureExecutor().removeChore(this.deadMetricChore); 298 } 299 } 300 301 // Stop the Assignment Thread 302 stopAssignmentThread(); 303 304 // Stop the RegionStateStore 305 regionStates.clear(); 306 307 // Update meta events (for testing) 308 if (hasProcExecutor) { 309 metaLoadEvent.suspend(); 310 for (RegionInfo hri: getMetaRegionSet()) { 311 setMetaAssigned(hri, false); 312 } 313 } 314 } 315 316 public boolean isRunning() { 317 return running.get(); 318 } 319 320 public Configuration getConfiguration() { 321 return master.getConfiguration(); 322 } 323 324 public MetricsAssignmentManager getAssignmentManagerMetrics() { 325 return metrics; 326 } 327 328 private LoadBalancer getBalancer() { 329 return master.getLoadBalancer(); 330 } 331 332 private MasterProcedureEnv getProcedureEnvironment() { 333 return master.getMasterProcedureExecutor().getEnvironment(); 334 } 335 336 private MasterProcedureScheduler getProcedureScheduler() { 337 return getProcedureEnvironment().getProcedureScheduler(); 338 } 339 340 int getAssignMaxAttempts() { 341 return assignMaxAttempts; 342 } 343 344 int getAssignRetryImmediatelyMaxAttempts() { 345 return assignRetryImmediatelyMaxAttempts; 346 } 347 348 public RegionStates getRegionStates() { 349 return regionStates; 350 } 351 352 /** 353 * Returns the regions hosted by the specified server. 354 * <p/> 355 * Notice that, for SCP, after we submit the SCP, no one can change the region list for the 356 * ServerStateNode so we do not need any locks here. And for other usage, this can only give you a 357 * snapshot of the current region list for this server, which means, right after you get the 358 * region list, new regions may be moved to this server or some regions may be moved out from this 359 * server, so you should not use it critically if you need strong consistency. 360 */ 361 public List<RegionInfo> getRegionsOnServer(ServerName serverName) { 362 ServerStateNode serverInfo = regionStates.getServerNode(serverName); 363 if (serverInfo == null) { 364 return Collections.emptyList(); 365 } 366 return serverInfo.getRegionInfoList(); 367 } 368 369 public RegionStateStore getRegionStateStore() { 370 return regionStateStore; 371 } 372 373 public List<ServerName> getFavoredNodes(final RegionInfo regionInfo) { 374 return this.shouldAssignRegionsWithFavoredNodes 375 ? ((FavoredStochasticBalancer) getBalancer()).getFavoredNodes(regionInfo) 376 : ServerName.EMPTY_SERVER_LIST; 377 } 378 379 // ============================================================================================ 380 // Table State Manager helpers 381 // ============================================================================================ 382 TableStateManager getTableStateManager() { 383 return master.getTableStateManager(); 384 } 385 386 public boolean isTableEnabled(final TableName tableName) { 387 return getTableStateManager().isTableState(tableName, TableState.State.ENABLED); 388 } 389 390 public boolean isTableDisabled(final TableName tableName) { 391 return getTableStateManager().isTableState(tableName, 392 TableState.State.DISABLED, TableState.State.DISABLING); 393 } 394 395 // ============================================================================================ 396 // META Helpers 397 // ============================================================================================ 398 private boolean isMetaRegion(final RegionInfo regionInfo) { 399 return regionInfo.isMetaRegion(); 400 } 401 402 public boolean isMetaRegion(final byte[] regionName) { 403 return getMetaRegionFromName(regionName) != null; 404 } 405 406 public RegionInfo getMetaRegionFromName(final byte[] regionName) { 407 for (RegionInfo hri: getMetaRegionSet()) { 408 if (Bytes.equals(hri.getRegionName(), regionName)) { 409 return hri; 410 } 411 } 412 return null; 413 } 414 415 public boolean isCarryingMeta(final ServerName serverName) { 416 // TODO: handle multiple meta 417 return isCarryingRegion(serverName, RegionInfoBuilder.FIRST_META_REGIONINFO); 418 } 419 420 private boolean isCarryingRegion(final ServerName serverName, final RegionInfo regionInfo) { 421 // TODO: check for state? 422 final RegionStateNode node = regionStates.getRegionStateNode(regionInfo); 423 return(node != null && serverName.equals(node.getRegionLocation())); 424 } 425 426 private RegionInfo getMetaForRegion(final RegionInfo regionInfo) { 427 //if (regionInfo.isMetaRegion()) return regionInfo; 428 // TODO: handle multiple meta. if the region provided is not meta lookup 429 // which meta the region belongs to. 430 return RegionInfoBuilder.FIRST_META_REGIONINFO; 431 } 432 433 // TODO: handle multiple meta. 434 private static final Set<RegionInfo> META_REGION_SET = 435 Collections.singleton(RegionInfoBuilder.FIRST_META_REGIONINFO); 436 public Set<RegionInfo> getMetaRegionSet() { 437 return META_REGION_SET; 438 } 439 440 // ============================================================================================ 441 // META Event(s) helpers 442 // ============================================================================================ 443 /** 444 * Notice that, this only means the meta region is available on a RS, but the AM may still be 445 * loading the region states from meta, so usually you need to check {@link #isMetaLoaded()} first 446 * before checking this method, unless you can make sure that your piece of code can only be 447 * executed after AM builds the region states. 448 * @see #isMetaLoaded() 449 */ 450 public boolean isMetaAssigned() { 451 return metaAssignEvent.isReady(); 452 } 453 454 public boolean isMetaRegionInTransition() { 455 return !isMetaAssigned(); 456 } 457 458 /** 459 * Notice that this event does not mean the AM has already finished region state rebuilding. See 460 * the comment of {@link #isMetaAssigned()} for more details. 461 * @see #isMetaAssigned() 462 */ 463 public boolean waitMetaAssigned(Procedure<?> proc, RegionInfo regionInfo) { 464 return getMetaAssignEvent(getMetaForRegion(regionInfo)).suspendIfNotReady(proc); 465 } 466 467 private void setMetaAssigned(RegionInfo metaRegionInfo, boolean assigned) { 468 assert isMetaRegion(metaRegionInfo) : "unexpected non-meta region " + metaRegionInfo; 469 ProcedureEvent<?> metaAssignEvent = getMetaAssignEvent(metaRegionInfo); 470 if (assigned) { 471 metaAssignEvent.wake(getProcedureScheduler()); 472 } else { 473 metaAssignEvent.suspend(); 474 } 475 } 476 477 private ProcedureEvent<?> getMetaAssignEvent(RegionInfo metaRegionInfo) { 478 assert isMetaRegion(metaRegionInfo) : "unexpected non-meta region " + metaRegionInfo; 479 // TODO: handle multiple meta. 480 return metaAssignEvent; 481 } 482 483 /** 484 * Wait until AM finishes the meta loading, i.e, the region states rebuilding. 485 * @see #isMetaLoaded() 486 * @see #waitMetaAssigned(Procedure, RegionInfo) 487 */ 488 public boolean waitMetaLoaded(Procedure<?> proc) { 489 return metaLoadEvent.suspendIfNotReady(proc); 490 } 491 492 @VisibleForTesting 493 void wakeMetaLoadedEvent() { 494 metaLoadEvent.wake(getProcedureScheduler()); 495 assert isMetaLoaded() : "expected meta to be loaded"; 496 } 497 498 /** 499 * Return whether AM finishes the meta loading, i.e, the region states rebuilding. 500 * @see #isMetaAssigned() 501 * @see #waitMetaLoaded(Procedure) 502 */ 503 public boolean isMetaLoaded() { 504 return metaLoadEvent.isReady(); 505 } 506 507 /** 508 * Start a new thread to check if there are region servers whose versions are higher than others. 509 * If so, move all system table regions to RS with the highest version to keep compatibility. 510 * The reason is, RS in new version may not be able to access RS in old version when there are 511 * some incompatible changes. 512 * <p>This method is called when a new RegionServer is added to cluster only.</p> 513 */ 514 public void checkIfShouldMoveSystemRegionAsync() { 515 // TODO: Fix this thread. If a server is killed and a new one started, this thread thinks that 516 // it should 'move' the system tables from the old server to the new server but 517 // ServerCrashProcedure is on it; and it will take care of the assign without dataloss. 518 if (this.master.getServerManager().countOfRegionServers() <= 1) { 519 return; 520 } 521 // This thread used to run whenever there was a change in the cluster. The ZooKeeper 522 // childrenChanged notification came in before the nodeDeleted message and so this method 523 // cold run before a ServerCrashProcedure could run. That meant that this thread could see 524 // a Crashed Server before ServerCrashProcedure and it could find system regions on the 525 // crashed server and go move them before ServerCrashProcedure had a chance; could be 526 // dataloss too if WALs were not recovered. 527 new Thread(() -> { 528 try { 529 synchronized (checkIfShouldMoveSystemRegionLock) { 530 List<RegionPlan> plans = new ArrayList<>(); 531 // TODO: I don't think this code does a good job if all servers in cluster have same 532 // version. It looks like it will schedule unnecessary moves. 533 for (ServerName server : getExcludedServersForSystemTable()) { 534 if (master.getServerManager().isServerDead(server)) { 535 // TODO: See HBASE-18494 and HBASE-18495. Though getExcludedServersForSystemTable() 536 // considers only online servers, the server could be queued for dead server 537 // processing. As region assignments for crashed server is handled by 538 // ServerCrashProcedure, do NOT handle them here. The goal is to handle this through 539 // regular flow of LoadBalancer as a favored node and not to have this special 540 // handling. 541 continue; 542 } 543 List<RegionInfo> regionsShouldMove = getSystemTables(server); 544 if (!regionsShouldMove.isEmpty()) { 545 for (RegionInfo regionInfo : regionsShouldMove) { 546 // null value for dest forces destination server to be selected by balancer 547 RegionPlan plan = new RegionPlan(regionInfo, server, null); 548 if (regionInfo.isMetaRegion()) { 549 // Must move meta region first. 550 LOG.info("Async MOVE of {} to newer Server={}", 551 regionInfo.getEncodedName(), server); 552 moveAsync(plan); 553 } else { 554 plans.add(plan); 555 } 556 } 557 } 558 for (RegionPlan plan : plans) { 559 LOG.info("Async MOVE of {} to newer Server={}", 560 plan.getRegionInfo().getEncodedName(), server); 561 moveAsync(plan); 562 } 563 } 564 } 565 } catch (Throwable t) { 566 LOG.error(t.toString(), t); 567 } 568 }).start(); 569 } 570 571 private List<RegionInfo> getSystemTables(ServerName serverName) { 572 ServerStateNode serverNode = regionStates.getServerNode(serverName); 573 if (serverNode == null) { 574 return Collections.emptyList(); 575 } 576 return serverNode.getSystemRegionInfoList(); 577 } 578 579 private void preTransitCheck(RegionStateNode regionNode, RegionState.State[] expectedStates) 580 throws HBaseIOException { 581 if (regionNode.getProcedure() != null) { 582 throw new HBaseIOException(regionNode + " is currently in transition"); 583 } 584 if (!regionNode.isInState(expectedStates)) { 585 throw new DoNotRetryRegionException("Unexpected state for " + regionNode); 586 } 587 if (isTableDisabled(regionNode.getTable())) { 588 throw new DoNotRetryIOException(regionNode.getTable() + " is disabled for " + regionNode); 589 } 590 } 591 592 private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn) 593 throws IOException { 594 // TODO: should we use getRegionStateNode? 595 RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo); 596 TransitRegionStateProcedure proc; 597 regionNode.lock(); 598 try { 599 preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN); 600 proc = TransitRegionStateProcedure.assign(getProcedureEnvironment(), regionInfo, sn); 601 regionNode.setProcedure(proc); 602 } finally { 603 regionNode.unlock(); 604 } 605 return proc; 606 } 607 608 // TODO: Need an async version of this for hbck2. 609 public long assign(RegionInfo regionInfo, ServerName sn) throws IOException { 610 TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn); 611 ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc); 612 return proc.getProcId(); 613 } 614 615 public long assign(RegionInfo regionInfo) throws IOException { 616 return assign(regionInfo, null); 617 } 618 619 /** 620 * Submits a procedure that assigns a region to a target server without waiting for it to finish 621 * @param regionInfo the region we would like to assign 622 * @param sn target server name 623 * @return 624 * @throws IOException 625 */ 626 public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException { 627 TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn); 628 return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc); 629 } 630 631 /** 632 * Submits a procedure that assigns a region without waiting for it to finish 633 * @param regionInfo the region we would like to assign 634 * @return 635 * @throws IOException 636 */ 637 public Future<byte[]> assignAsync(RegionInfo regionInfo) throws IOException { 638 return assignAsync(regionInfo, null); 639 } 640 641 public long unassign(RegionInfo regionInfo) throws IOException { 642 RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo); 643 if (regionNode == null) { 644 throw new UnknownRegionException("No RegionState found for " + regionInfo.getEncodedName()); 645 } 646 TransitRegionStateProcedure proc; 647 regionNode.lock(); 648 try { 649 preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE); 650 proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), regionInfo); 651 regionNode.setProcedure(proc); 652 } finally { 653 regionNode.unlock(); 654 } 655 ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc); 656 return proc.getProcId(); 657 } 658 659 public TransitRegionStateProcedure createMoveRegionProcedure(RegionInfo regionInfo, 660 ServerName targetServer) throws HBaseIOException { 661 RegionStateNode regionNode = this.regionStates.getRegionStateNode(regionInfo); 662 if (regionNode == null) { 663 throw new UnknownRegionException("No RegionStateNode found for " + 664 regionInfo.getEncodedName() + "(Closed/Deleted?)"); 665 } 666 TransitRegionStateProcedure proc; 667 regionNode.lock(); 668 try { 669 preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE); 670 regionNode.checkOnline(); 671 proc = TransitRegionStateProcedure.move(getProcedureEnvironment(), regionInfo, targetServer); 672 regionNode.setProcedure(proc); 673 } finally { 674 regionNode.unlock(); 675 } 676 return proc; 677 } 678 679 public void move(RegionInfo regionInfo) throws IOException { 680 TransitRegionStateProcedure proc = createMoveRegionProcedure(regionInfo, null); 681 ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc); 682 } 683 684 public Future<byte[]> moveAsync(RegionPlan regionPlan) throws HBaseIOException { 685 TransitRegionStateProcedure proc = 686 createMoveRegionProcedure(regionPlan.getRegionInfo(), regionPlan.getDestination()); 687 return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc); 688 } 689 690 // ============================================================================================ 691 // RegionTransition procedures helpers 692 // ============================================================================================ 693 694 /** 695 * Create round-robin assigns. Use on table creation to distribute out regions across cluster. 696 * @return AssignProcedures made out of the passed in <code>hris</code> and a call to the balancer 697 * to populate the assigns with targets chosen using round-robin (default balancer 698 * scheme). If at assign-time, the target chosen is no longer up, thats fine, the 699 * AssignProcedure will ask the balancer for a new target, and so on. 700 */ 701 public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris, 702 List<ServerName> serversToExclude) { 703 if (hris.isEmpty()) { 704 return new TransitRegionStateProcedure[0]; 705 } 706 707 if (serversToExclude != null 708 && this.master.getServerManager().getOnlineServersList().size() == 1) { 709 LOG.debug("Only one region server found and hence going ahead with the assignment"); 710 serversToExclude = null; 711 } 712 try { 713 // Ask the balancer to assign our regions. Pass the regions en masse. The balancer can do 714 // a better job if it has all the assignments in the one lump. 715 Map<ServerName, List<RegionInfo>> assignments = getBalancer().roundRobinAssignment(hris, 716 this.master.getServerManager().createDestinationServersList(serversToExclude)); 717 // Return mid-method! 718 return createAssignProcedures(assignments); 719 } catch (HBaseIOException hioe) { 720 LOG.warn("Failed roundRobinAssignment", hioe); 721 } 722 // If an error above, fall-through to this simpler assign. Last resort. 723 return createAssignProcedures(hris); 724 } 725 726 /** 727 * Create round-robin assigns. Use on table creation to distribute out regions across cluster. 728 * @return AssignProcedures made out of the passed in <code>hris</code> and a call to the balancer 729 * to populate the assigns with targets chosen using round-robin (default balancer 730 * scheme). If at assign-time, the target chosen is no longer up, thats fine, the 731 * AssignProcedure will ask the balancer for a new target, and so on. 732 */ 733 public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris) { 734 return createRoundRobinAssignProcedures(hris, null); 735 } 736 737 @VisibleForTesting 738 static int compare(TransitRegionStateProcedure left, TransitRegionStateProcedure right) { 739 if (left.getRegion().isMetaRegion()) { 740 if (right.getRegion().isMetaRegion()) { 741 return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion()); 742 } 743 return -1; 744 } else if (right.getRegion().isMetaRegion()) { 745 return +1; 746 } 747 if (left.getRegion().getTable().isSystemTable()) { 748 if (right.getRegion().getTable().isSystemTable()) { 749 return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion()); 750 } 751 return -1; 752 } else if (right.getRegion().getTable().isSystemTable()) { 753 return +1; 754 } 755 return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion()); 756 } 757 758 private TransitRegionStateProcedure createAssignProcedure(RegionStateNode regionNode, 759 ServerName targetServer, boolean override) { 760 TransitRegionStateProcedure proc; 761 regionNode.lock(); 762 try { 763 if(override && regionNode.getProcedure() != null) { 764 regionNode.unsetProcedure(regionNode.getProcedure()); 765 } 766 assert regionNode.getProcedure() == null; 767 proc = TransitRegionStateProcedure.assign(getProcedureEnvironment(), 768 regionNode.getRegionInfo(), targetServer); 769 regionNode.setProcedure(proc); 770 } finally { 771 regionNode.unlock(); 772 } 773 return proc; 774 } 775 776 private TransitRegionStateProcedure createUnassignProcedure(RegionStateNode regionNode, 777 boolean override) { 778 TransitRegionStateProcedure proc; 779 regionNode.lock(); 780 try { 781 if(override && regionNode.getProcedure() != null) { 782 regionNode.unsetProcedure(regionNode.getProcedure()); 783 } 784 assert regionNode.getProcedure() == null; 785 proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), 786 regionNode.getRegionInfo()); 787 regionNode.setProcedure(proc); 788 } finally { 789 regionNode.unlock(); 790 } 791 return proc; 792 } 793 794 /** 795 * Create one TransitRegionStateProcedure to assign a region w/o specifying a target server. 796 * This method is specified for HBCK2 797 */ 798 public TransitRegionStateProcedure createOneAssignProcedure(RegionInfo hri, boolean override) { 799 RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(hri); 800 return createAssignProcedure(regionNode, null, override); 801 } 802 803 /** 804 * Create one TransitRegionStateProcedure to unassign a region. 805 * This method is specified for HBCK2 806 */ 807 public TransitRegionStateProcedure createOneUnassignProcedure(RegionInfo hri, boolean override) { 808 RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(hri); 809 return createUnassignProcedure(regionNode, override); 810 } 811 812 /** 813 * Create an array of TransitRegionStateProcedure w/o specifying a target server. 814 * <p/> 815 * If no target server, at assign time, we will try to use the former location of the region if 816 * one exists. This is how we 'retain' the old location across a server restart. 817 * <p/> 818 * Should only be called when you can make sure that no one can touch these regions other than 819 * you. For example, when you are creating table. 820 */ 821 public TransitRegionStateProcedure[] createAssignProcedures(List<RegionInfo> hris) { 822 return hris.stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri)) 823 .map(regionNode -> createAssignProcedure(regionNode, null, false)) 824 .sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new); 825 } 826 827 /** 828 * @param assignments Map of assignments from which we produce an array of AssignProcedures. 829 * @return Assignments made from the passed in <code>assignments</code> 830 */ 831 private TransitRegionStateProcedure[] createAssignProcedures( 832 Map<ServerName, List<RegionInfo>> assignments) { 833 return assignments.entrySet().stream() 834 .flatMap(e -> e.getValue().stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri)) 835 .map(regionNode -> createAssignProcedure(regionNode, e.getKey(), false))) 836 .sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new); 837 } 838 839 /** 840 * Called by DisableTableProcedure to unassign all the regions for a table. 841 */ 842 public TransitRegionStateProcedure[] createUnassignProceduresForDisabling(TableName tableName) { 843 return regionStates.getTableRegionStateNodes(tableName).stream().map(regionNode -> { 844 regionNode.lock(); 845 try { 846 if (!regionStates.include(regionNode, false) || 847 regionStates.isRegionOffline(regionNode.getRegionInfo())) { 848 return null; 849 } 850 // As in DisableTableProcedure, we will hold the xlock for table, so we can make sure that 851 // this procedure has not been executed yet, as TRSP will hold the shared lock for table all 852 // the time. So here we will unset it and when it is actually executed, it will find that 853 // the attach procedure is not itself and quit immediately. 854 if (regionNode.getProcedure() != null) { 855 regionNode.unsetProcedure(regionNode.getProcedure()); 856 } 857 TransitRegionStateProcedure proc = TransitRegionStateProcedure 858 .unassign(getProcedureEnvironment(), regionNode.getRegionInfo()); 859 regionNode.setProcedure(proc); 860 return proc; 861 } finally { 862 regionNode.unlock(); 863 } 864 }).filter(p -> p != null).toArray(TransitRegionStateProcedure[]::new); 865 } 866 867 public SplitTableRegionProcedure createSplitProcedure(final RegionInfo regionToSplit, 868 final byte[] splitKey) throws IOException { 869 return new SplitTableRegionProcedure(getProcedureEnvironment(), regionToSplit, splitKey); 870 } 871 872 public MergeTableRegionsProcedure createMergeProcedure(RegionInfo ... ris) throws IOException { 873 return new MergeTableRegionsProcedure(getProcedureEnvironment(), ris, false); 874 } 875 876 /** 877 * Delete the region states. This is called by "DeleteTable" 878 */ 879 public void deleteTable(final TableName tableName) throws IOException { 880 final ArrayList<RegionInfo> regions = regionStates.getTableRegionsInfo(tableName); 881 regionStateStore.deleteRegions(regions); 882 for (int i = 0; i < regions.size(); ++i) { 883 final RegionInfo regionInfo = regions.get(i); 884 // we expect the region to be offline 885 regionStates.removeFromOfflineRegions(regionInfo); 886 regionStates.deleteRegion(regionInfo); 887 } 888 } 889 890 // ============================================================================================ 891 // RS Region Transition Report helpers 892 // ============================================================================================ 893 private void reportRegionStateTransition(ReportRegionStateTransitionResponse.Builder builder, 894 ServerName serverName, List<RegionStateTransition> transitionList) throws IOException { 895 for (RegionStateTransition transition : transitionList) { 896 switch (transition.getTransitionCode()) { 897 case OPENED: 898 case FAILED_OPEN: 899 case CLOSED: 900 assert transition.getRegionInfoCount() == 1 : transition; 901 final RegionInfo hri = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0)); 902 long procId = 903 transition.getProcIdCount() > 0 ? transition.getProcId(0) : Procedure.NO_PROC_ID; 904 updateRegionTransition(serverName, transition.getTransitionCode(), hri, 905 transition.hasOpenSeqNum() ? transition.getOpenSeqNum() : HConstants.NO_SEQNUM, procId); 906 break; 907 case READY_TO_SPLIT: 908 case SPLIT: 909 case SPLIT_REVERTED: 910 assert transition.getRegionInfoCount() == 3 : transition; 911 final RegionInfo parent = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0)); 912 final RegionInfo splitA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1)); 913 final RegionInfo splitB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2)); 914 updateRegionSplitTransition(serverName, transition.getTransitionCode(), parent, splitA, 915 splitB); 916 break; 917 case READY_TO_MERGE: 918 case MERGED: 919 case MERGE_REVERTED: 920 assert transition.getRegionInfoCount() == 3 : transition; 921 final RegionInfo merged = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0)); 922 final RegionInfo mergeA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1)); 923 final RegionInfo mergeB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2)); 924 updateRegionMergeTransition(serverName, transition.getTransitionCode(), merged, mergeA, 925 mergeB); 926 break; 927 } 928 } 929 } 930 931 public ReportRegionStateTransitionResponse reportRegionStateTransition( 932 final ReportRegionStateTransitionRequest req) throws PleaseHoldException { 933 ReportRegionStateTransitionResponse.Builder builder = 934 ReportRegionStateTransitionResponse.newBuilder(); 935 ServerName serverName = ProtobufUtil.toServerName(req.getServer()); 936 ServerStateNode serverNode = regionStates.getOrCreateServer(serverName); 937 // here we have to acquire a read lock instead of a simple exclusive lock. This is because that 938 // we should not block other reportRegionStateTransition call from the same region server. This 939 // is not only about performance, but also to prevent dead lock. Think of the meta region is 940 // also on the same region server and you hold the lock which blocks the 941 // reportRegionStateTransition for meta, and since meta is not online, you will block inside the 942 // lock protection to wait for meta online... 943 serverNode.readLock().lock(); 944 try { 945 // we only accept reportRegionStateTransition if the region server is online, see the comment 946 // above in submitServerCrash method and HBASE-21508 for more details. 947 if (serverNode.isInState(ServerState.ONLINE)) { 948 try { 949 reportRegionStateTransition(builder, serverName, req.getTransitionList()); 950 } catch (PleaseHoldException e) { 951 LOG.trace("Failed transition ", e); 952 throw e; 953 } catch (UnsupportedOperationException | IOException e) { 954 // TODO: at the moment we have a single error message and the RS will abort 955 // if the master says that one of the region transitions failed. 956 LOG.warn("Failed transition", e); 957 builder.setErrorMessage("Failed transition " + e.getMessage()); 958 } 959 } else { 960 LOG.warn("The region server {} is already dead, skip reportRegionStateTransition call", 961 serverName); 962 builder.setErrorMessage("You are dead"); 963 } 964 } finally { 965 serverNode.readLock().unlock(); 966 } 967 968 return builder.build(); 969 } 970 971 private void updateRegionTransition(ServerName serverName, TransitionCode state, 972 RegionInfo regionInfo, long seqId, long procId) throws IOException { 973 checkMetaLoaded(regionInfo); 974 975 RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo); 976 if (regionNode == null) { 977 // the table/region is gone. maybe a delete, split, merge 978 throw new UnexpectedStateException(String.format( 979 "Server %s was trying to transition region %s to %s. but Region is not known.", 980 serverName, regionInfo, state)); 981 } 982 LOG.trace("Update region transition serverName={} region={} regionState={}", serverName, 983 regionNode, state); 984 985 ServerStateNode serverNode = regionStates.getOrCreateServer(serverName); 986 regionNode.lock(); 987 try { 988 if (!reportTransition(regionNode, serverNode, state, seqId, procId)) { 989 // Don't log WARN if shutting down cluster; during shutdown. Avoid the below messages: 990 // 2018-08-13 10:45:10,551 WARN ...AssignmentManager: No matching procedure found for 991 // rit=OPEN, location=ve0538.halxg.cloudera.com,16020,1533493000958, 992 // table=IntegrationTestBigLinkedList, region=65ab289e2fc1530df65f6c3d7cde7aa5 transition 993 // to CLOSED 994 // These happen because on cluster shutdown, we currently let the RegionServers close 995 // regions. This is the only time that region close is not run by the Master (so cluster 996 // goes down fast). Consider changing it so Master runs all shutdowns. 997 if (this.master.getServerManager().isClusterShutdown() && 998 state.equals(TransitionCode.CLOSED)) { 999 LOG.info("RegionServer {} {}", state, regionNode.getRegionInfo().getEncodedName()); 1000 } else { 1001 LOG.warn("No matching procedure found for {} transition on {} to {}", 1002 serverName, regionNode, state); 1003 } 1004 } 1005 } finally { 1006 regionNode.unlock(); 1007 } 1008 } 1009 1010 private boolean reportTransition(RegionStateNode regionNode, ServerStateNode serverNode, 1011 TransitionCode state, long seqId, long procId) throws IOException { 1012 ServerName serverName = serverNode.getServerName(); 1013 TransitRegionStateProcedure proc = regionNode.getProcedure(); 1014 if (proc == null) { 1015 return false; 1016 } 1017 proc.reportTransition(master.getMasterProcedureExecutor().getEnvironment(), regionNode, 1018 serverName, state, seqId, procId); 1019 return true; 1020 } 1021 1022 private void updateRegionSplitTransition(final ServerName serverName, final TransitionCode state, 1023 final RegionInfo parent, final RegionInfo hriA, final RegionInfo hriB) 1024 throws IOException { 1025 checkMetaLoaded(parent); 1026 1027 if (state != TransitionCode.READY_TO_SPLIT) { 1028 throw new UnexpectedStateException("unsupported split regionState=" + state + 1029 " for parent region " + parent + 1030 " maybe an old RS (< 2.0) had the operation in progress"); 1031 } 1032 1033 // sanity check on the request 1034 if (!Bytes.equals(hriA.getEndKey(), hriB.getStartKey())) { 1035 throw new UnsupportedOperationException( 1036 "unsupported split request with bad keys: parent=" + parent + 1037 " hriA=" + hriA + " hriB=" + hriB); 1038 } 1039 1040 if (!master.isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 1041 LOG.warn("Split switch is off! skip split of " + parent); 1042 throw new DoNotRetryIOException("Split region " + parent.getRegionNameAsString() + 1043 " failed due to split switch off"); 1044 } 1045 1046 // Submit the Split procedure 1047 final byte[] splitKey = hriB.getStartKey(); 1048 if (LOG.isDebugEnabled()) { 1049 LOG.debug("Split request from " + serverName + 1050 ", parent=" + parent + " splitKey=" + Bytes.toStringBinary(splitKey)); 1051 } 1052 master.getMasterProcedureExecutor().submitProcedure(createSplitProcedure(parent, splitKey)); 1053 1054 // If the RS is < 2.0 throw an exception to abort the operation, we are handling the split 1055 if (master.getServerManager().getVersionNumber(serverName) < 0x0200000) { 1056 throw new UnsupportedOperationException(String.format( 1057 "Split handled by the master: parent=%s hriA=%s hriB=%s", parent.getShortNameToLog(), hriA, hriB)); 1058 } 1059 } 1060 1061 private void updateRegionMergeTransition(final ServerName serverName, final TransitionCode state, 1062 final RegionInfo merged, final RegionInfo hriA, final RegionInfo hriB) throws IOException { 1063 checkMetaLoaded(merged); 1064 1065 if (state != TransitionCode.READY_TO_MERGE) { 1066 throw new UnexpectedStateException("Unsupported merge regionState=" + state + 1067 " for regionA=" + hriA + " regionB=" + hriB + " merged=" + merged + 1068 " maybe an old RS (< 2.0) had the operation in progress"); 1069 } 1070 1071 if (!master.isSplitOrMergeEnabled(MasterSwitchType.MERGE)) { 1072 LOG.warn("Merge switch is off! skip merge of regionA=" + hriA + " regionB=" + hriB); 1073 throw new DoNotRetryIOException("Merge of regionA=" + hriA + " regionB=" + hriB + 1074 " failed because merge switch is off"); 1075 } 1076 1077 // Submit the Merge procedure 1078 if (LOG.isDebugEnabled()) { 1079 LOG.debug("Handling merge request from RS=" + merged + ", merged=" + merged); 1080 } 1081 master.getMasterProcedureExecutor().submitProcedure(createMergeProcedure(hriA, hriB)); 1082 1083 // If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge 1084 if (master.getServerManager().getVersionNumber(serverName) < 0x0200000) { 1085 throw new UnsupportedOperationException(String.format( 1086 "Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state, merged, hriA, 1087 hriB)); 1088 } 1089 } 1090 1091 // ============================================================================================ 1092 // RS Status update (report online regions) helpers 1093 // ============================================================================================ 1094 /** 1095 * The master will call this method when the RS send the regionServerReport(). The report will 1096 * contains the "online regions". This method will check the the online regions against the 1097 * in-memory state of the AM, and we will log a warn message if there is a mismatch. This is 1098 * because that there is no fencing between the reportRegionStateTransition method and 1099 * regionServerReport method, so there could be race and introduce inconsistency here, but 1100 * actually there is no problem. 1101 * <p/> 1102 * Please see HBASE-21421 and HBASE-21463 for more details. 1103 */ 1104 public void reportOnlineRegions(ServerName serverName, Set<byte[]> regionNames) { 1105 if (!isRunning()) { 1106 return; 1107 } 1108 if (LOG.isTraceEnabled()) { 1109 LOG.trace("ReportOnlineRegions {} regionCount={}, metaLoaded={} {}", serverName, 1110 regionNames.size(), isMetaLoaded(), 1111 regionNames.stream().map(Bytes::toStringBinary).collect(Collectors.toList())); 1112 } 1113 1114 ServerStateNode serverNode = regionStates.getOrCreateServer(serverName); 1115 synchronized (serverNode) { 1116 if (!serverNode.isInState(ServerState.ONLINE)) { 1117 LOG.warn("Got a report from a server result in state " + serverNode.getState()); 1118 return; 1119 } 1120 } 1121 1122 // Track the regionserver reported online regions in memory. 1123 synchronized (rsReports) { 1124 rsReports.put(serverName, regionNames); 1125 } 1126 1127 if (regionNames.isEmpty()) { 1128 // nothing to do if we don't have regions 1129 LOG.trace("no online region found on {}", serverName); 1130 return; 1131 } 1132 if (!isMetaLoaded()) { 1133 // we are still on startup, skip checking 1134 return; 1135 } 1136 // The Heartbeat tells us of what regions are on the region serve, check the state. 1137 checkOnlineRegionsReport(serverNode, regionNames); 1138 } 1139 1140 /** 1141 * Close <code>regionName</code> on <code>sn</code> silently and immediately without 1142 * using a Procedure or going via hbase:meta. For case where a RegionServer's hosting 1143 * of a Region is not aligned w/ the Master's accounting of Region state. This is for 1144 * cleaning up an error in accounting. 1145 */ 1146 private void closeRegionSilently(ServerName sn, byte [] regionName) { 1147 try { 1148 RegionInfo ri = MetaTableAccessor.parseRegionInfoFromRegionName(regionName); 1149 // Pass -1 for timeout. Means do not wait. 1150 ServerManager.closeRegionSilentlyAndWait(this.master.getClusterConnection(), sn, ri, -1); 1151 } catch (Exception e) { 1152 LOG.error("Failed trying to close {} on {}", Bytes.toStringBinary(regionName), sn, e); 1153 } 1154 } 1155 1156 /** 1157 * Check that what the RegionServer reports aligns with the Master's image. 1158 * If disagreement, we will tell the RegionServer to expediently close 1159 * a Region we do not think it should have. 1160 */ 1161 private void checkOnlineRegionsReport(ServerStateNode serverNode, Set<byte[]> regionNames) { 1162 ServerName serverName = serverNode.getServerName(); 1163 for (byte[] regionName : regionNames) { 1164 if (!isRunning()) { 1165 return; 1166 } 1167 RegionStateNode regionNode = regionStates.getRegionStateNodeFromName(regionName); 1168 if (regionNode == null) { 1169 String regionNameAsStr = Bytes.toStringBinary(regionName); 1170 LOG.warn("No RegionStateNode for {} but reported as up on {}; closing...", 1171 regionNameAsStr, serverName); 1172 closeRegionSilently(serverNode.getServerName(), regionName); 1173 continue; 1174 } 1175 final long lag = 1000; 1176 regionNode.lock(); 1177 try { 1178 long diff = EnvironmentEdgeManager.currentTime() - regionNode.getLastUpdate(); 1179 if (regionNode.isInState(State.OPENING, State.OPEN)) { 1180 // This is possible as a region server has just closed a region but the region server 1181 // report is generated before the closing, but arrive after the closing. Make sure there 1182 // is some elapsed time so less false alarms. 1183 if (!regionNode.getRegionLocation().equals(serverName) && diff > lag) { 1184 LOG.warn("Reporting {} server does not match {} (time since last " + 1185 "update={}ms); closing...", 1186 serverName, regionNode, diff); 1187 closeRegionSilently(serverNode.getServerName(), regionName); 1188 } 1189 } else if (!regionNode.isInState(State.CLOSING, State.SPLITTING)) { 1190 // So, we can get report that a region is CLOSED or SPLIT because a heartbeat 1191 // came in at about same time as a region transition. Make sure there is some 1192 // elapsed time so less false alarms. 1193 if (diff > lag) { 1194 LOG.warn("Reporting {} state does not match {} (time since last update={}ms)", 1195 serverName, regionNode, diff); 1196 } 1197 } 1198 } finally { 1199 regionNode.unlock(); 1200 } 1201 } 1202 } 1203 1204 // ============================================================================================ 1205 // RIT chore 1206 // ============================================================================================ 1207 private static class RegionInTransitionChore extends ProcedureInMemoryChore<MasterProcedureEnv> { 1208 public RegionInTransitionChore(final int timeoutMsec) { 1209 super(timeoutMsec); 1210 } 1211 1212 @Override 1213 protected void periodicExecute(final MasterProcedureEnv env) { 1214 final AssignmentManager am = env.getAssignmentManager(); 1215 1216 final RegionInTransitionStat ritStat = am.computeRegionInTransitionStat(); 1217 if (ritStat.hasRegionsOverThreshold()) { 1218 for (RegionState hri: ritStat.getRegionOverThreshold()) { 1219 am.handleRegionOverStuckWarningThreshold(hri.getRegion()); 1220 } 1221 } 1222 1223 // update metrics 1224 am.updateRegionsInTransitionMetrics(ritStat); 1225 } 1226 } 1227 1228 private static class DeadServerMetricRegionChore 1229 extends ProcedureInMemoryChore<MasterProcedureEnv> { 1230 public DeadServerMetricRegionChore(final int timeoutMsec) { 1231 super(timeoutMsec); 1232 } 1233 1234 @Override 1235 protected void periodicExecute(final MasterProcedureEnv env) { 1236 final ServerManager sm = env.getMasterServices().getServerManager(); 1237 final AssignmentManager am = env.getAssignmentManager(); 1238 // To minimize inconsistencies we are not going to snapshot live servers in advance in case 1239 // new servers are added; OTOH we don't want to add heavy sync for a consistent view since 1240 // this is for metrics. Instead, we're going to check each regions as we go; to avoid making 1241 // too many checks, we maintain a local lists of server, limiting us to false negatives. If 1242 // we miss some recently-dead server, we'll just see it next time. 1243 Set<ServerName> recentlyLiveServers = new HashSet<>(); 1244 int deadRegions = 0, unknownRegions = 0; 1245 for (RegionStateNode rsn : am.getRegionStates().getRegionStateNodes()) { 1246 if (rsn.getState() != State.OPEN) { 1247 continue; // Opportunistic check, should quickly skip RITs, offline tables, etc. 1248 } 1249 // Do not need to acquire region state lock as this is only for showing metrics. 1250 ServerName sn = rsn.getRegionLocation(); 1251 State state = rsn.getState(); 1252 if (state != State.OPEN) { 1253 continue; // Mostly skipping RITs that are already being take care of. 1254 } 1255 if (sn == null) { 1256 ++unknownRegions; // Opened on null? 1257 continue; 1258 } 1259 if (recentlyLiveServers.contains(sn)) { 1260 continue; 1261 } 1262 ServerManager.ServerLiveState sls = sm.isServerKnownAndOnline(sn); 1263 switch (sls) { 1264 case LIVE: 1265 recentlyLiveServers.add(sn); 1266 break; 1267 case DEAD: 1268 ++deadRegions; 1269 break; 1270 case UNKNOWN: 1271 ++unknownRegions; 1272 break; 1273 default: throw new AssertionError("Unexpected " + sls); 1274 } 1275 } 1276 if (deadRegions > 0 || unknownRegions > 0) { 1277 LOG.info("Found {} OPEN regions on dead servers and {} OPEN regions on unknown servers", 1278 deadRegions, unknownRegions); 1279 } 1280 1281 am.updateDeadServerRegionMetrics(deadRegions, unknownRegions); 1282 } 1283 } 1284 1285 public RegionInTransitionStat computeRegionInTransitionStat() { 1286 final RegionInTransitionStat rit = new RegionInTransitionStat(getConfiguration()); 1287 rit.update(this); 1288 return rit; 1289 } 1290 1291 public static class RegionInTransitionStat { 1292 private final int ritThreshold; 1293 1294 private HashMap<String, RegionState> ritsOverThreshold = null; 1295 private long statTimestamp; 1296 private long oldestRITTime = 0; 1297 private int totalRITsTwiceThreshold = 0; 1298 private int totalRITs = 0; 1299 1300 @VisibleForTesting 1301 public RegionInTransitionStat(final Configuration conf) { 1302 this.ritThreshold = 1303 conf.getInt(METRICS_RIT_STUCK_WARNING_THRESHOLD, DEFAULT_RIT_STUCK_WARNING_THRESHOLD); 1304 } 1305 1306 public int getRITThreshold() { 1307 return ritThreshold; 1308 } 1309 1310 public long getTimestamp() { 1311 return statTimestamp; 1312 } 1313 1314 public int getTotalRITs() { 1315 return totalRITs; 1316 } 1317 1318 public long getOldestRITTime() { 1319 return oldestRITTime; 1320 } 1321 1322 public int getTotalRITsOverThreshold() { 1323 Map<String, RegionState> m = this.ritsOverThreshold; 1324 return m != null ? m.size() : 0; 1325 } 1326 1327 public boolean hasRegionsTwiceOverThreshold() { 1328 return totalRITsTwiceThreshold > 0; 1329 } 1330 1331 public boolean hasRegionsOverThreshold() { 1332 Map<String, RegionState> m = this.ritsOverThreshold; 1333 return m != null && !m.isEmpty(); 1334 } 1335 1336 public Collection<RegionState> getRegionOverThreshold() { 1337 Map<String, RegionState> m = this.ritsOverThreshold; 1338 return m != null? m.values(): Collections.emptySet(); 1339 } 1340 1341 public boolean isRegionOverThreshold(final RegionInfo regionInfo) { 1342 Map<String, RegionState> m = this.ritsOverThreshold; 1343 return m != null && m.containsKey(regionInfo.getEncodedName()); 1344 } 1345 1346 public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) { 1347 Map<String, RegionState> m = this.ritsOverThreshold; 1348 if (m == null) return false; 1349 final RegionState state = m.get(regionInfo.getEncodedName()); 1350 if (state == null) return false; 1351 return (statTimestamp - state.getStamp()) > (ritThreshold * 2); 1352 } 1353 1354 protected void update(final AssignmentManager am) { 1355 final RegionStates regionStates = am.getRegionStates(); 1356 this.statTimestamp = EnvironmentEdgeManager.currentTime(); 1357 update(regionStates.getRegionsStateInTransition(), statTimestamp); 1358 update(regionStates.getRegionFailedOpen(), statTimestamp); 1359 } 1360 1361 private void update(final Collection<RegionState> regions, final long currentTime) { 1362 for (RegionState state: regions) { 1363 totalRITs++; 1364 final long ritTime = currentTime - state.getStamp(); 1365 if (ritTime > ritThreshold) { 1366 if (ritsOverThreshold == null) { 1367 ritsOverThreshold = new HashMap<String, RegionState>(); 1368 } 1369 ritsOverThreshold.put(state.getRegion().getEncodedName(), state); 1370 totalRITsTwiceThreshold += (ritTime > (ritThreshold * 2)) ? 1 : 0; 1371 } 1372 if (oldestRITTime < ritTime) { 1373 oldestRITTime = ritTime; 1374 } 1375 } 1376 } 1377 } 1378 1379 private void updateRegionsInTransitionMetrics(final RegionInTransitionStat ritStat) { 1380 metrics.updateRITOldestAge(ritStat.getOldestRITTime()); 1381 metrics.updateRITCount(ritStat.getTotalRITs()); 1382 metrics.updateRITCountOverThreshold(ritStat.getTotalRITsOverThreshold()); 1383 } 1384 1385 private void updateDeadServerRegionMetrics(int deadRegions, int unknownRegions) { 1386 metrics.updateDeadServerOpenRegions(deadRegions); 1387 metrics.updateUnknownServerOpenRegions(unknownRegions); 1388 } 1389 1390 private void handleRegionOverStuckWarningThreshold(final RegionInfo regionInfo) { 1391 final RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo); 1392 //if (regionNode.isStuck()) { 1393 LOG.warn("STUCK Region-In-Transition {}", regionNode); 1394 } 1395 1396 // ============================================================================================ 1397 // TODO: Master load/bootstrap 1398 // ============================================================================================ 1399 public void joinCluster() throws IOException { 1400 long startTime = System.nanoTime(); 1401 LOG.debug("Joining cluster..."); 1402 1403 // Scan hbase:meta to build list of existing regions, servers, and assignment. 1404 // hbase:meta is online now or will be. Inside loadMeta, we keep trying. Can't make progress 1405 // w/o meta. 1406 loadMeta(); 1407 1408 while (master.getServerManager().countOfRegionServers() < 1) { 1409 LOG.info("Waiting for RegionServers to join; current count={}", 1410 master.getServerManager().countOfRegionServers()); 1411 Threads.sleep(250); 1412 } 1413 LOG.info("Number of RegionServers={}", master.getServerManager().countOfRegionServers()); 1414 1415 // Start the chores 1416 master.getMasterProcedureExecutor().addChore(this.ritChore); 1417 master.getMasterProcedureExecutor().addChore(this.deadMetricChore); 1418 1419 long costMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime); 1420 LOG.info("Joined the cluster in {}", StringUtils.humanTimeDiff(costMs)); 1421 } 1422 1423 /** 1424 * Create assign procedure for offline regions. 1425 * Just follow the old processofflineServersWithOnlineRegions method. Since now we do not need to 1426 * deal with dead server any more, we only deal with the regions in OFFLINE state in this method. 1427 * And this is a bit strange, that for new regions, we will add it in CLOSED state instead of 1428 * OFFLINE state, and usually there will be a procedure to track them. The 1429 * processofflineServersWithOnlineRegions is a legacy from long ago, as things are going really 1430 * different now, maybe we do not need this method any more. Need to revisit later. 1431 */ 1432 // Public so can be run by the Master as part of the startup. Needs hbase:meta to be online. 1433 // Needs to be done after the table state manager has been started. 1434 public void processOfflineRegions() { 1435 List<RegionInfo> offlineRegions = regionStates.getRegionStates().stream() 1436 .filter(RegionState::isOffline).filter(s -> isTableEnabled(s.getRegion().getTable())) 1437 .map(RegionState::getRegion).collect(Collectors.toList()); 1438 if (!offlineRegions.isEmpty()) { 1439 master.getMasterProcedureExecutor().submitProcedures( 1440 master.getAssignmentManager().createRoundRobinAssignProcedures(offlineRegions)); 1441 } 1442 } 1443 1444 /* AM internal RegionStateStore.RegionStateVisitor implementation. To be used when 1445 * scanning META table for region rows, using RegionStateStore utility methods. RegionStateStore 1446 * methods will convert Result into proper RegionInfo instances, but those would still need to be 1447 * added into AssignmentManager.regionStates in-memory cache. 1448 * RegionMetaLoadingVisitor.visitRegionState method provides the logic for adding RegionInfo 1449 * instances as loaded from latest META scan into AssignmentManager.regionStates. 1450 */ 1451 private class RegionMetaLoadingVisitor implements RegionStateStore.RegionStateVisitor { 1452 1453 @Override 1454 public void visitRegionState(Result result, final RegionInfo regionInfo, final State state, 1455 final ServerName regionLocation, final ServerName lastHost, final long openSeqNum) { 1456 if (state == null && regionLocation == null && lastHost == null && 1457 openSeqNum == SequenceId.NO_SEQUENCE_ID) { 1458 // This is a row with nothing in it. 1459 LOG.warn("Skipping empty row={}", result); 1460 return; 1461 } 1462 State localState = state; 1463 if (localState == null) { 1464 // No region state column data in hbase:meta table! Are I doing a rolling upgrade from 1465 // hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta? 1466 // In any of these cases, state is empty. For now, presume OFFLINE but there are probably 1467 // cases where we need to probe more to be sure this correct; TODO informed by experience. 1468 LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE); 1469 localState = State.OFFLINE; 1470 } 1471 RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo); 1472 // Do not need to lock on regionNode, as we can make sure that before we finish loading 1473 // meta, all the related procedures can not be executed. The only exception is for meta 1474 // region related operations, but here we do not load the informations for meta region. 1475 regionNode.setState(localState); 1476 regionNode.setLastHost(lastHost); 1477 regionNode.setRegionLocation(regionLocation); 1478 regionNode.setOpenSeqNum(openSeqNum); 1479 1480 // Note: keep consistent with other methods, see region(Opening|Opened|Closing) 1481 // RIT/ServerCrash handling should take care of the transiting regions. 1482 if (localState.matches(State.OPEN, State.OPENING, State.CLOSING, State.SPLITTING, 1483 State.MERGING)) { 1484 assert regionLocation != null : "found null region location for " + regionNode; 1485 regionStates.addRegionToServer(regionNode); 1486 } else if (localState == State.OFFLINE || regionInfo.isOffline()) { 1487 regionStates.addToOfflineRegions(regionNode); 1488 } 1489 if (regionNode.getProcedure() != null) { 1490 regionNode.getProcedure().stateLoaded(AssignmentManager.this, regionNode); 1491 } 1492 } 1493 }; 1494 1495 /** 1496 * Query META if the given <code>RegionInfo</code> exists, adding to 1497 * <code>AssignmentManager.regionStateStore</code> cache if the region is found in META. 1498 * @param regionEncodedName encoded name for the region to be loaded from META into 1499 * <code>AssignmentManager.regionStateStore</code> cache 1500 * @return <code>RegionInfo</code> instance for the given region if it is present in META 1501 * and got successfully loaded into <code>AssignmentManager.regionStateStore</code> 1502 * cache, <b>null</b> otherwise. 1503 * @throws UnknownRegionException if any errors occur while querying meta. 1504 */ 1505 public RegionInfo loadRegionFromMeta(String regionEncodedName) throws UnknownRegionException { 1506 try { 1507 RegionMetaLoadingVisitor visitor = new RegionMetaLoadingVisitor(); 1508 regionStateStore.visitMetaForRegion(regionEncodedName, visitor); 1509 return regionStates.getRegionState(regionEncodedName) == null ? null : 1510 regionStates.getRegionState(regionEncodedName).getRegion(); 1511 } catch(IOException e) { 1512 LOG.error("Error trying to load region {} from META", regionEncodedName, e); 1513 throw new UnknownRegionException("Error while trying load region from meta"); 1514 } 1515 } 1516 1517 private void loadMeta() throws IOException { 1518 // TODO: use a thread pool 1519 regionStateStore.visitMeta(new RegionMetaLoadingVisitor()); 1520 // every assignment is blocked until meta is loaded. 1521 wakeMetaLoadedEvent(); 1522 } 1523 1524 /** 1525 * Used to check if the meta loading is done. 1526 * <p/> 1527 * if not we throw PleaseHoldException since we are rebuilding the RegionStates 1528 * @param hri region to check if it is already rebuild 1529 * @throws PleaseHoldException if meta has not been loaded yet 1530 */ 1531 private void checkMetaLoaded(RegionInfo hri) throws PleaseHoldException { 1532 if (!isRunning()) { 1533 throw new PleaseHoldException("AssignmentManager not running"); 1534 } 1535 boolean meta = isMetaRegion(hri); 1536 boolean metaLoaded = isMetaLoaded(); 1537 if (!meta && !metaLoaded) { 1538 throw new PleaseHoldException( 1539 "Master not fully online; hbase:meta=" + meta + ", metaLoaded=" + metaLoaded); 1540 } 1541 } 1542 1543 // ============================================================================================ 1544 // TODO: Metrics 1545 // ============================================================================================ 1546 public int getNumRegionsOpened() { 1547 // TODO: Used by TestRegionPlacement.java and assume monotonically increasing value 1548 return 0; 1549 } 1550 1551 /** 1552 * Usually run by the Master in reaction to server crash during normal processing. 1553 * Can also be invoked via external RPC to effect repair; in the latter case, 1554 * the 'force' flag is set so we push through the SCP though context may indicate 1555 * already-running-SCP (An old SCP may have exited abnormally, or damaged cluster 1556 * may still have references in hbase:meta to 'Unknown Servers' -- servers that 1557 * are not online or in dead servers list, etc.) 1558 * @param force Set if the request came in externally over RPC (via hbck2). Force means 1559 * run the SCP even if it seems as though there might be an outstanding 1560 * SCP running. 1561 * @return pid of scheduled SCP or {@link Procedure#NO_PROC_ID} if none scheduled. 1562 */ 1563 public long submitServerCrash(ServerName serverName, boolean shouldSplitWal, boolean force) { 1564 // May be an 'Unknown Server' so handle case where serverNode is null. 1565 ServerStateNode serverNode = regionStates.getServerNode(serverName); 1566 // Remove the in-memory rsReports result 1567 synchronized (rsReports) { 1568 rsReports.remove(serverName); 1569 } 1570 1571 // We hold the write lock here for fencing on reportRegionStateTransition. Once we set the 1572 // server state to CRASHED, we will no longer accept the reportRegionStateTransition call from 1573 // this server. This is used to simplify the implementation for TRSP and SCP, where we can make 1574 // sure that, the region list fetched by SCP will not be changed any more. 1575 if (serverNode != null) { 1576 serverNode.writeLock().lock(); 1577 } 1578 boolean carryingMeta; 1579 long pid; 1580 try { 1581 ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor(); 1582 carryingMeta = isCarryingMeta(serverName); 1583 if (!force && serverNode != null && !serverNode.isInState(ServerState.ONLINE)) { 1584 LOG.info("Skip adding ServerCrashProcedure for {} (meta={}) -- running?", 1585 serverNode, carryingMeta); 1586 return Procedure.NO_PROC_ID; 1587 } else { 1588 MasterProcedureEnv mpe = procExec.getEnvironment(); 1589 // If serverNode == null, then 'Unknown Server'. Schedule HBCKSCP instead. 1590 // HBCKSCP scours Master in-memory state AND hbase;meta for references to 1591 // serverName just-in-case. An SCP that is scheduled when the server is 1592 // 'Unknown' probably originated externally with HBCK2 fix-it tool. 1593 ServerState oldState = null; 1594 if (serverNode != null) { 1595 oldState = serverNode.getState(); 1596 serverNode.setState(ServerState.CRASHED); 1597 } 1598 1599 if (force) { 1600 pid = procExec.submitProcedure( 1601 new HBCKServerCrashProcedure(mpe, serverName, shouldSplitWal, carryingMeta)); 1602 } else { 1603 pid = procExec.submitProcedure( 1604 new ServerCrashProcedure(mpe, serverName, shouldSplitWal, carryingMeta)); 1605 } 1606 LOG.info("Scheduled ServerCrashProcedure pid={} for {} (carryingMeta={}){}.", 1607 pid, serverName, carryingMeta, 1608 serverNode == null? "": " " + serverNode.toString() + ", oldState=" + oldState); 1609 } 1610 } finally { 1611 if (serverNode != null) { 1612 serverNode.writeLock().unlock(); 1613 } 1614 } 1615 return pid; 1616 } 1617 1618 public void offlineRegion(final RegionInfo regionInfo) { 1619 // TODO used by MasterRpcServices 1620 RegionStateNode node = regionStates.getRegionStateNode(regionInfo); 1621 if (node != null) { 1622 node.offline(); 1623 } 1624 } 1625 1626 public void onlineRegion(final RegionInfo regionInfo, final ServerName serverName) { 1627 // TODO used by TestSplitTransactionOnCluster.java 1628 } 1629 1630 public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment( 1631 final Collection<RegionInfo> regions) { 1632 return regionStates.getSnapShotOfAssignment(regions); 1633 } 1634 1635 // ============================================================================================ 1636 // TODO: UTILS/HELPERS? 1637 // ============================================================================================ 1638 /** 1639 * Used by the client (via master) to identify if all regions have the schema updates 1640 * 1641 * @param tableName 1642 * @return Pair indicating the status of the alter command (pending/total) 1643 * @throws IOException 1644 */ 1645 public Pair<Integer, Integer> getReopenStatus(TableName tableName) { 1646 if (isTableDisabled(tableName)) return new Pair<Integer, Integer>(0, 0); 1647 1648 final List<RegionState> states = regionStates.getTableRegionStates(tableName); 1649 int ritCount = 0; 1650 for (RegionState regionState: states) { 1651 if (!regionState.isOpened() && !regionState.isSplit()) { 1652 ritCount++; 1653 } 1654 } 1655 return new Pair<Integer, Integer>(ritCount, states.size()); 1656 } 1657 1658 // ============================================================================================ 1659 // TODO: Region State In Transition 1660 // ============================================================================================ 1661 public boolean hasRegionsInTransition() { 1662 return regionStates.hasRegionsInTransition(); 1663 } 1664 1665 public List<RegionStateNode> getRegionsInTransition() { 1666 return regionStates.getRegionsInTransition(); 1667 } 1668 1669 public List<RegionInfo> getAssignedRegions() { 1670 return regionStates.getAssignedRegions(); 1671 } 1672 1673 public RegionInfo getRegionInfo(final byte[] regionName) { 1674 final RegionStateNode regionState = regionStates.getRegionStateNodeFromName(regionName); 1675 return regionState != null ? regionState.getRegionInfo() : null; 1676 } 1677 1678 // ============================================================================================ 1679 // Expected states on region state transition. 1680 // Notice that there is expected states for transiting to OPENING state, this is because SCP. 1681 // See the comments in regionOpening method for more details. 1682 // ============================================================================================ 1683 private static final State[] STATES_EXPECTED_ON_OPEN = { 1684 State.OPENING, // Normal case 1685 State.OPEN // Retrying 1686 }; 1687 1688 private static final State[] STATES_EXPECTED_ON_CLOSING = { 1689 State.OPEN, // Normal case 1690 State.CLOSING, // Retrying 1691 State.SPLITTING, // Offline the split parent 1692 State.MERGING // Offline the merge parents 1693 }; 1694 1695 private static final State[] STATES_EXPECTED_ON_CLOSED = { 1696 State.CLOSING, // Normal case 1697 State.CLOSED // Retrying 1698 }; 1699 1700 // This is for manually scheduled region assign, can add other states later if we find out other 1701 // usages 1702 private static final State[] STATES_EXPECTED_ON_ASSIGN = { State.CLOSED, State.OFFLINE }; 1703 1704 // We only allow unassign or move a region which is in OPEN state. 1705 private static final State[] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State.OPEN }; 1706 1707 // ============================================================================================ 1708 // Region Status update 1709 // Should only be called in TransitRegionStateProcedure(and related procedures), as the locking 1710 // and pre-assumptions are very tricky. 1711 // ============================================================================================ 1712 private void transitStateAndUpdate(RegionStateNode regionNode, RegionState.State newState, 1713 RegionState.State... expectedStates) throws IOException { 1714 RegionState.State state = regionNode.getState(); 1715 regionNode.transitionState(newState, expectedStates); 1716 boolean succ = false; 1717 try { 1718 regionStateStore.updateRegionLocation(regionNode); 1719 succ = true; 1720 } finally { 1721 if (!succ) { 1722 // revert 1723 regionNode.setState(state); 1724 } 1725 } 1726 } 1727 1728 // should be called within the synchronized block of RegionStateNode 1729 void regionOpening(RegionStateNode regionNode) throws IOException { 1730 // As in SCP, for performance reason, there is no TRSP attached with this region, we will not 1731 // update the region state, which means that the region could be in any state when we want to 1732 // assign it after a RS crash. So here we do not pass the expectedStates parameter. 1733 transitStateAndUpdate(regionNode, State.OPENING); 1734 regionStates.addRegionToServer(regionNode); 1735 // update the operation count metrics 1736 metrics.incrementOperationCounter(); 1737 } 1738 1739 // should be called under the RegionStateNode lock 1740 // The parameter 'giveUp' means whether we will try to open the region again, if it is true, then 1741 // we will persist the FAILED_OPEN state into hbase:meta. 1742 void regionFailedOpen(RegionStateNode regionNode, boolean giveUp) throws IOException { 1743 RegionState.State state = regionNode.getState(); 1744 ServerName regionLocation = regionNode.getRegionLocation(); 1745 if (giveUp) { 1746 regionNode.setState(State.FAILED_OPEN); 1747 regionNode.setRegionLocation(null); 1748 boolean succ = false; 1749 try { 1750 regionStateStore.updateRegionLocation(regionNode); 1751 succ = true; 1752 } finally { 1753 if (!succ) { 1754 // revert 1755 regionNode.setState(state); 1756 regionNode.setRegionLocation(regionLocation); 1757 } 1758 } 1759 } 1760 if (regionLocation != null) { 1761 regionStates.removeRegionFromServer(regionLocation, regionNode); 1762 } 1763 } 1764 1765 // should be called under the RegionStateNode lock 1766 void regionClosing(RegionStateNode regionNode) throws IOException { 1767 transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING); 1768 1769 RegionInfo hri = regionNode.getRegionInfo(); 1770 // Set meta has not initialized early. so people trying to create/edit tables will wait 1771 if (isMetaRegion(hri)) { 1772 setMetaAssigned(hri, false); 1773 } 1774 regionStates.addRegionToServer(regionNode); 1775 // update the operation count metrics 1776 metrics.incrementOperationCounter(); 1777 } 1778 1779 // for open and close, they will first be persist to the procedure store in 1780 // RegionRemoteProcedureBase. So here we will first change the in memory state as it is considered 1781 // as succeeded if the persistence to procedure store is succeeded, and then when the 1782 // RegionRemoteProcedureBase is woken up, we will persist the RegionStateNode to hbase:meta. 1783 1784 // should be called under the RegionStateNode lock 1785 void regionOpenedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException { 1786 regionNode.transitionState(State.OPEN, STATES_EXPECTED_ON_OPEN); 1787 RegionInfo regionInfo = regionNode.getRegionInfo(); 1788 regionStates.addRegionToServer(regionNode); 1789 regionStates.removeFromFailedOpen(regionInfo); 1790 } 1791 1792 // should be called under the RegionStateNode lock 1793 void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException { 1794 ServerName regionLocation = regionNode.getRegionLocation(); 1795 regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED); 1796 regionNode.setRegionLocation(null); 1797 if (regionLocation != null) { 1798 regionNode.setLastHost(regionLocation); 1799 regionStates.removeRegionFromServer(regionLocation, regionNode); 1800 } 1801 } 1802 1803 // should be called under the RegionStateNode lock 1804 // for SCP 1805 public void regionClosedAbnormally(RegionStateNode regionNode) throws IOException { 1806 RegionState.State state = regionNode.getState(); 1807 ServerName regionLocation = regionNode.getRegionLocation(); 1808 regionNode.transitionState(State.ABNORMALLY_CLOSED); 1809 regionNode.setRegionLocation(null); 1810 boolean succ = false; 1811 try { 1812 regionStateStore.updateRegionLocation(regionNode); 1813 succ = true; 1814 } finally { 1815 if (!succ) { 1816 // revert 1817 regionNode.setState(state); 1818 regionNode.setRegionLocation(regionLocation); 1819 } 1820 } 1821 if (regionLocation != null) { 1822 regionNode.setLastHost(regionLocation); 1823 regionStates.removeRegionFromServer(regionLocation, regionNode); 1824 } 1825 } 1826 1827 void persistToMeta(RegionStateNode regionNode) throws IOException { 1828 regionStateStore.updateRegionLocation(regionNode); 1829 RegionInfo regionInfo = regionNode.getRegionInfo(); 1830 if (isMetaRegion(regionInfo) && regionNode.getState() == State.OPEN) { 1831 // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it 1832 // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager 1833 // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state 1834 // on table that contains state. 1835 setMetaAssigned(regionInfo, true); 1836 } 1837 } 1838 1839 // ============================================================================================ 1840 // The above methods can only be called in TransitRegionStateProcedure(and related procedures) 1841 // ============================================================================================ 1842 1843 public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName, 1844 final RegionInfo daughterA, final RegionInfo daughterB) throws IOException { 1845 // Update hbase:meta. Parent will be marked offline and split up in hbase:meta. 1846 // The parent stays in regionStates until cleared when removed by CatalogJanitor. 1847 // Update its state in regionStates to it shows as offline and split when read 1848 // later figuring what regions are in a table and what are not: see 1849 // regionStates#getRegionsOfTable 1850 final RegionStateNode node = regionStates.getOrCreateRegionStateNode(parent); 1851 node.setState(State.SPLIT); 1852 final RegionStateNode nodeA = regionStates.getOrCreateRegionStateNode(daughterA); 1853 nodeA.setState(State.SPLITTING_NEW); 1854 final RegionStateNode nodeB = regionStates.getOrCreateRegionStateNode(daughterB); 1855 nodeB.setState(State.SPLITTING_NEW); 1856 1857 regionStateStore.splitRegion(parent, daughterA, daughterB, serverName); 1858 if (shouldAssignFavoredNodes(parent)) { 1859 List<ServerName> onlineServers = this.master.getServerManager().getOnlineServersList(); 1860 ((FavoredNodesPromoter)getBalancer()). 1861 generateFavoredNodesForDaughter(onlineServers, parent, daughterA, daughterB); 1862 } 1863 } 1864 1865 /** 1866 * When called here, the merge has happened. The merged regions have been 1867 * unassigned and the above markRegionClosed has been called on each so they have been 1868 * disassociated from a hosting Server. The merged region will be open after this call. The 1869 * merged regions are removed from hbase:meta below. Later they are deleted from the filesystem 1870 * by the catalog janitor running against hbase:meta. It notices when the merged region no 1871 * longer holds references to the old regions (References are deleted after a compaction 1872 * rewrites what the Reference points at but not until the archiver chore runs, are the 1873 * References removed). 1874 */ 1875 public void markRegionAsMerged(final RegionInfo child, final ServerName serverName, 1876 RegionInfo [] mergeParents) 1877 throws IOException { 1878 final RegionStateNode node = regionStates.getOrCreateRegionStateNode(child); 1879 node.setState(State.MERGED); 1880 for (RegionInfo ri: mergeParents) { 1881 regionStates.deleteRegion(ri); 1882 1883 } 1884 regionStateStore.mergeRegions(child, mergeParents, serverName); 1885 if (shouldAssignFavoredNodes(child)) { 1886 ((FavoredNodesPromoter)getBalancer()). 1887 generateFavoredNodesForMergedRegion(child, mergeParents); 1888 } 1889 } 1890 1891 /* 1892 * Favored nodes should be applied only when FavoredNodes balancer is configured and the region 1893 * belongs to a non-system table. 1894 */ 1895 private boolean shouldAssignFavoredNodes(RegionInfo region) { 1896 return this.shouldAssignRegionsWithFavoredNodes && 1897 FavoredNodesManager.isFavoredNodeApplicable(region); 1898 } 1899 1900 // ============================================================================================ 1901 // Assign Queue (Assign/Balance) 1902 // ============================================================================================ 1903 private final ArrayList<RegionStateNode> pendingAssignQueue = new ArrayList<RegionStateNode>(); 1904 private final ReentrantLock assignQueueLock = new ReentrantLock(); 1905 private final Condition assignQueueFullCond = assignQueueLock.newCondition(); 1906 1907 /** 1908 * Add the assign operation to the assignment queue. 1909 * The pending assignment operation will be processed, 1910 * and each region will be assigned by a server using the balancer. 1911 */ 1912 protected void queueAssign(final RegionStateNode regionNode) { 1913 regionNode.getProcedureEvent().suspend(); 1914 1915 // TODO: quick-start for meta and the other sys-tables? 1916 assignQueueLock.lock(); 1917 try { 1918 pendingAssignQueue.add(regionNode); 1919 if (regionNode.isSystemTable() || 1920 pendingAssignQueue.size() == 1 || 1921 pendingAssignQueue.size() >= assignDispatchWaitQueueMaxSize) { 1922 assignQueueFullCond.signal(); 1923 } 1924 } finally { 1925 assignQueueLock.unlock(); 1926 } 1927 } 1928 1929 private void startAssignmentThread() { 1930 assignThread = new Thread(master.getServerName().toShortString()) { 1931 @Override 1932 public void run() { 1933 while (isRunning()) { 1934 processAssignQueue(); 1935 } 1936 pendingAssignQueue.clear(); 1937 } 1938 }; 1939 assignThread.setDaemon(true); 1940 assignThread.start(); 1941 } 1942 1943 private void stopAssignmentThread() { 1944 assignQueueSignal(); 1945 try { 1946 while (assignThread.isAlive()) { 1947 assignQueueSignal(); 1948 assignThread.join(250); 1949 } 1950 } catch (InterruptedException e) { 1951 LOG.warn("join interrupted", e); 1952 Thread.currentThread().interrupt(); 1953 } 1954 } 1955 1956 private void assignQueueSignal() { 1957 assignQueueLock.lock(); 1958 try { 1959 assignQueueFullCond.signal(); 1960 } finally { 1961 assignQueueLock.unlock(); 1962 } 1963 } 1964 1965 @edu.umd.cs.findbugs.annotations.SuppressWarnings("WA_AWAIT_NOT_IN_LOOP") 1966 private HashMap<RegionInfo, RegionStateNode> waitOnAssignQueue() { 1967 HashMap<RegionInfo, RegionStateNode> regions = null; 1968 1969 assignQueueLock.lock(); 1970 try { 1971 if (pendingAssignQueue.isEmpty() && isRunning()) { 1972 assignQueueFullCond.await(); 1973 } 1974 1975 if (!isRunning()) return null; 1976 assignQueueFullCond.await(assignDispatchWaitMillis, TimeUnit.MILLISECONDS); 1977 regions = new HashMap<RegionInfo, RegionStateNode>(pendingAssignQueue.size()); 1978 for (RegionStateNode regionNode: pendingAssignQueue) { 1979 regions.put(regionNode.getRegionInfo(), regionNode); 1980 } 1981 pendingAssignQueue.clear(); 1982 } catch (InterruptedException e) { 1983 LOG.warn("got interrupted ", e); 1984 Thread.currentThread().interrupt(); 1985 } finally { 1986 assignQueueLock.unlock(); 1987 } 1988 return regions; 1989 } 1990 1991 private void processAssignQueue() { 1992 final HashMap<RegionInfo, RegionStateNode> regions = waitOnAssignQueue(); 1993 if (regions == null || regions.size() == 0 || !isRunning()) { 1994 return; 1995 } 1996 1997 if (LOG.isTraceEnabled()) { 1998 LOG.trace("PROCESS ASSIGN QUEUE regionCount=" + regions.size()); 1999 } 2000 2001 // TODO: Optimize balancer. pass a RegionPlan? 2002 final HashMap<RegionInfo, ServerName> retainMap = new HashMap<>(); 2003 final List<RegionInfo> userHRIs = new ArrayList<>(regions.size()); 2004 // Regions for system tables requiring reassignment 2005 final List<RegionInfo> systemHRIs = new ArrayList<>(); 2006 for (RegionStateNode regionStateNode: regions.values()) { 2007 boolean sysTable = regionStateNode.isSystemTable(); 2008 final List<RegionInfo> hris = sysTable? systemHRIs: userHRIs; 2009 if (regionStateNode.getRegionLocation() != null) { 2010 retainMap.put(regionStateNode.getRegionInfo(), regionStateNode.getRegionLocation()); 2011 } else { 2012 hris.add(regionStateNode.getRegionInfo()); 2013 } 2014 } 2015 2016 // TODO: connect with the listener to invalidate the cache 2017 2018 // TODO use events 2019 List<ServerName> servers = master.getServerManager().createDestinationServersList(); 2020 for (int i = 0; servers.size() < 1; ++i) { 2021 // Report every fourth time around this loop; try not to flood log. 2022 if (i % 4 == 0) { 2023 LOG.warn("No servers available; cannot place " + regions.size() + " unassigned regions."); 2024 } 2025 2026 if (!isRunning()) { 2027 LOG.debug("Stopped! Dropping assign of " + regions.size() + " queued regions."); 2028 return; 2029 } 2030 Threads.sleep(250); 2031 servers = master.getServerManager().createDestinationServersList(); 2032 } 2033 2034 if (!systemHRIs.isEmpty()) { 2035 // System table regions requiring reassignment are present, get region servers 2036 // not available for system table regions 2037 final List<ServerName> excludeServers = getExcludedServersForSystemTable(); 2038 List<ServerName> serversForSysTables = servers.stream() 2039 .filter(s -> !excludeServers.contains(s)).collect(Collectors.toList()); 2040 if (serversForSysTables.isEmpty()) { 2041 LOG.warn("Filtering old server versions and the excluded produced an empty set; " + 2042 "instead considering all candidate servers!"); 2043 } 2044 LOG.debug("Processing assignQueue; systemServersCount=" + serversForSysTables.size() + 2045 ", allServersCount=" + servers.size()); 2046 processAssignmentPlans(regions, null, systemHRIs, 2047 serversForSysTables.isEmpty() && !containsBogusAssignments(regions, systemHRIs) ? 2048 servers: serversForSysTables); 2049 } 2050 2051 processAssignmentPlans(regions, retainMap, userHRIs, servers); 2052 } 2053 2054 private boolean containsBogusAssignments(Map<RegionInfo, RegionStateNode> regions, 2055 List<RegionInfo> hirs) { 2056 for (RegionInfo ri : hirs) { 2057 if (regions.get(ri).getRegionLocation() != null && 2058 regions.get(ri).getRegionLocation().equals(LoadBalancer.BOGUS_SERVER_NAME)){ 2059 return true; 2060 } 2061 } 2062 return false; 2063 } 2064 2065 private void processAssignmentPlans(final HashMap<RegionInfo, RegionStateNode> regions, 2066 final HashMap<RegionInfo, ServerName> retainMap, final List<RegionInfo> hris, 2067 final List<ServerName> servers) { 2068 boolean isTraceEnabled = LOG.isTraceEnabled(); 2069 if (isTraceEnabled) { 2070 LOG.trace("Available servers count=" + servers.size() + ": " + servers); 2071 } 2072 2073 final LoadBalancer balancer = getBalancer(); 2074 // ask the balancer where to place regions 2075 if (retainMap != null && !retainMap.isEmpty()) { 2076 if (isTraceEnabled) { 2077 LOG.trace("retain assign regions=" + retainMap); 2078 } 2079 try { 2080 acceptPlan(regions, balancer.retainAssignment(retainMap, servers)); 2081 } catch (HBaseIOException e) { 2082 LOG.warn("unable to retain assignment", e); 2083 addToPendingAssignment(regions, retainMap.keySet()); 2084 } 2085 } 2086 2087 // TODO: Do we need to split retain and round-robin? 2088 // the retain seems to fallback to round-robin/random if the region is not in the map. 2089 if (!hris.isEmpty()) { 2090 Collections.sort(hris, RegionInfo.COMPARATOR); 2091 if (isTraceEnabled) { 2092 LOG.trace("round robin regions=" + hris); 2093 } 2094 try { 2095 acceptPlan(regions, balancer.roundRobinAssignment(hris, servers)); 2096 } catch (HBaseIOException e) { 2097 LOG.warn("unable to round-robin assignment", e); 2098 addToPendingAssignment(regions, hris); 2099 } 2100 } 2101 } 2102 2103 private void acceptPlan(final HashMap<RegionInfo, RegionStateNode> regions, 2104 final Map<ServerName, List<RegionInfo>> plan) throws HBaseIOException { 2105 final ProcedureEvent<?>[] events = new ProcedureEvent[regions.size()]; 2106 final long st = System.currentTimeMillis(); 2107 2108 if (plan == null) { 2109 throw new HBaseIOException("unable to compute plans for regions=" + regions.size()); 2110 } 2111 2112 if (plan.isEmpty()) return; 2113 2114 int evcount = 0; 2115 for (Map.Entry<ServerName, List<RegionInfo>> entry: plan.entrySet()) { 2116 final ServerName server = entry.getKey(); 2117 for (RegionInfo hri: entry.getValue()) { 2118 final RegionStateNode regionNode = regions.get(hri); 2119 regionNode.setRegionLocation(server); 2120 if (server.equals(LoadBalancer.BOGUS_SERVER_NAME) && regionNode.isSystemTable()) { 2121 assignQueueLock.lock(); 2122 try { 2123 pendingAssignQueue.add(regionNode); 2124 } finally { 2125 assignQueueLock.unlock(); 2126 } 2127 }else { 2128 events[evcount++] = regionNode.getProcedureEvent(); 2129 } 2130 } 2131 } 2132 ProcedureEvent.wakeEvents(getProcedureScheduler(), events); 2133 2134 final long et = System.currentTimeMillis(); 2135 if (LOG.isTraceEnabled()) { 2136 LOG.trace("ASSIGN ACCEPT " + events.length + " -> " + 2137 StringUtils.humanTimeDiff(et - st)); 2138 } 2139 } 2140 2141 private void addToPendingAssignment(final HashMap<RegionInfo, RegionStateNode> regions, 2142 final Collection<RegionInfo> pendingRegions) { 2143 assignQueueLock.lock(); 2144 try { 2145 for (RegionInfo hri: pendingRegions) { 2146 pendingAssignQueue.add(regions.get(hri)); 2147 } 2148 } finally { 2149 assignQueueLock.unlock(); 2150 } 2151 } 2152 2153 /** 2154 * Get a list of servers that this region cannot be assigned to. 2155 * For system tables, we must assign them to a server with highest version. 2156 */ 2157 public List<ServerName> getExcludedServersForSystemTable() { 2158 // TODO: This should be a cached list kept by the ServerManager rather than calculated on each 2159 // move or system region assign. The RegionServerTracker keeps list of online Servers with 2160 // RegionServerInfo that includes Version. 2161 List<Pair<ServerName, String>> serverList = master.getServerManager().getOnlineServersList() 2162 .stream() 2163 .map((s)->new Pair<>(s, master.getRegionServerVersion(s))) 2164 .collect(Collectors.toList()); 2165 if (serverList.isEmpty()) { 2166 return Collections.emptyList(); 2167 } 2168 String highestVersion = Collections.max(serverList, 2169 (o1, o2) -> VersionInfo.compareVersion(o1.getSecond(), o2.getSecond())).getSecond(); 2170 return serverList.stream() 2171 .filter((p)->!p.getSecond().equals(highestVersion)) 2172 .map(Pair::getFirst) 2173 .collect(Collectors.toList()); 2174 } 2175 2176 @VisibleForTesting 2177 MasterServices getMaster() { 2178 return master; 2179 } 2180 2181 /** 2182 * @return a snapshot of rsReports 2183 */ 2184 public Map<ServerName, Set<byte[]>> getRSReports() { 2185 Map<ServerName, Set<byte[]>> rsReportsSnapshot = new HashMap<>(); 2186 synchronized (rsReports) { 2187 rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue())); 2188 } 2189 return rsReportsSnapshot; 2190 } 2191 2192 /** 2193 * Provide regions state count for given table. 2194 * e.g howmany regions of give table are opened/closed/rit etc 2195 * 2196 * @param tableName TableName 2197 * @return region states count 2198 */ 2199 public RegionStatesCount getRegionStatesCount(TableName tableName) { 2200 int openRegionsCount = 0; 2201 int closedRegionCount = 0; 2202 int ritCount = 0; 2203 int splitRegionCount = 0; 2204 int totalRegionCount = 0; 2205 if (!isTableDisabled(tableName)) { 2206 final List<RegionState> states = regionStates.getTableRegionStates(tableName); 2207 for (RegionState regionState : states) { 2208 if (regionState.isOpened()) { 2209 openRegionsCount++; 2210 } else if (regionState.isClosed()) { 2211 closedRegionCount++; 2212 } else if (regionState.isSplit()) { 2213 splitRegionCount++; 2214 } 2215 } 2216 totalRegionCount = states.size(); 2217 ritCount = totalRegionCount - openRegionsCount - splitRegionCount; 2218 } 2219 return new RegionStatesCount.RegionStatesCountBuilder() 2220 .setOpenRegions(openRegionsCount) 2221 .setClosedRegions(closedRegionCount) 2222 .setSplitRegions(splitRegionCount) 2223 .setRegionsInTransition(ritCount) 2224 .setTotalRegions(totalRegionCount) 2225 .build(); 2226 } 2227 2228}