001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK; 021import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.List; 027import org.apache.hadoop.hbase.DoNotRetryIOException; 028import org.apache.hadoop.hbase.ServerName; 029import org.apache.hadoop.hbase.client.RegionInfo; 030import org.apache.hadoop.hbase.client.RegionInfoBuilder; 031import org.apache.hadoop.hbase.client.RegionReplicaUtil; 032import org.apache.hadoop.hbase.client.TableState; 033import org.apache.hadoop.hbase.master.MasterServices; 034import org.apache.hadoop.hbase.master.MasterWalManager; 035import org.apache.hadoop.hbase.master.SplitWALManager; 036import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 037import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 038import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 039import org.apache.hadoop.hbase.master.replication.ClaimReplicationQueuesProcedure; 040import org.apache.hadoop.hbase.monitoring.MonitoredTask; 041import org.apache.hadoop.hbase.monitoring.TaskMonitor; 042import org.apache.hadoop.hbase.procedure2.Procedure; 043import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 044import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 045import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 046import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 047import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; 048import org.apache.yetus.audience.InterfaceAudience; 049import org.slf4j.Logger; 050import org.slf4j.LoggerFactory; 051 052import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 053import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 054import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState; 055 056/** 057 * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called 058 * ServerShutdownHandler. 059 * <p> 060 * The procedure flow varies dependent on whether meta is assigned and if we are to split logs. 061 * <p> 062 * We come in here after ServerManager has noticed a server has expired. Procedures queued on the 063 * rpc should have been notified about fail and should be concurrently getting themselves ready to 064 * assign elsewhere. 065 */ 066@InterfaceAudience.Private 067public class ServerCrashProcedure extends 068 StateMachineProcedure<MasterProcedureEnv, ServerCrashState> implements ServerProcedureInterface { 069 private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class); 070 071 /** 072 * Configuration parameter to enable/disable the retain region assignment during 073 * ServerCrashProcedure. 074 * <p> 075 * By default retain assignment is disabled which makes the failover faster and improve the 076 * availability; useful for cloud scenario where region block locality is not important. Enable 077 * this when RegionServers are deployed on same host where Datanode are running, this will improve 078 * read performance due to local read. 079 * <p> 080 * see HBASE-24900 for more details. 081 */ 082 public static final String MASTER_SCP_RETAIN_ASSIGNMENT = "hbase.master.scp.retain.assignment"; 083 /** Default value of {@link #MASTER_SCP_RETAIN_ASSIGNMENT} */ 084 public static final boolean DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT = false; 085 086 /** 087 * Name of the crashed server to process. 088 */ 089 private ServerName serverName; 090 091 /** 092 * Whether DeadServer knows that we are processing it. 093 */ 094 private boolean notifiedDeadServer = false; 095 096 /** 097 * Regions that were on the crashed server. 098 */ 099 private List<RegionInfo> regionsOnCrashedServer; 100 101 private boolean carryingMeta = false; 102 private boolean shouldSplitWal; 103 private MonitoredTask status; 104 // currentRunningState is updated when ServerCrashProcedure get scheduled, child procedures update 105 // progress will not update the state because the actual state is overwritten by its next state 106 private ServerCrashState currentRunningState = getInitialState(); 107 108 /** 109 * Call this constructor queuing up a Procedure. 110 * @param serverName Name of the crashed server. 111 * @param shouldSplitWal True if we should split WALs as part of crashed server processing. 112 * @param carryingMeta True if carrying hbase:meta table region. 113 */ 114 public ServerCrashProcedure(final MasterProcedureEnv env, final ServerName serverName, 115 final boolean shouldSplitWal, final boolean carryingMeta) { 116 this.serverName = serverName; 117 this.shouldSplitWal = shouldSplitWal; 118 this.carryingMeta = carryingMeta; 119 this.setOwner(env.getRequestUser()); 120 } 121 122 /** 123 * Used when deserializing from a procedure store; we'll construct one of these then call 124 * #deserializeStateData(InputStream). Do not use directly. 125 */ 126 public ServerCrashProcedure() { 127 } 128 129 public boolean isInRecoverMetaState() { 130 return getCurrentState() == ServerCrashState.SERVER_CRASH_PROCESS_META; 131 } 132 133 @Override 134 protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state) 135 throws ProcedureSuspendedException, ProcedureYieldException { 136 final MasterServices services = env.getMasterServices(); 137 final AssignmentManager am = env.getAssignmentManager(); 138 updateProgress(true); 139 // HBASE-14802 If we have not yet notified that we are processing a dead server, do so now. 140 // This adds server to the DeadServer processing list but not to the DeadServers list. 141 // Server gets removed from processing list below on procedure successful finish. 142 if (!notifiedDeadServer) { 143 services.getServerManager().getDeadServers().processing(serverName); 144 notifiedDeadServer = true; 145 } 146 147 switch (state) { 148 case SERVER_CRASH_START: 149 case SERVER_CRASH_SPLIT_META_LOGS: 150 case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR: 151 case SERVER_CRASH_ASSIGN_META: 152 break; 153 default: 154 // If hbase:meta is not assigned, yield. 155 if (env.getAssignmentManager().waitMetaLoaded(this)) { 156 throw new ProcedureSuspendedException(); 157 } 158 } 159 try { 160 switch (state) { 161 case SERVER_CRASH_START: 162 LOG.info("Start " + this); 163 // If carrying meta, process it first. Else, get list of regions on crashed server. 164 if (this.carryingMeta) { 165 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS); 166 } else { 167 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); 168 } 169 break; 170 case SERVER_CRASH_SPLIT_META_LOGS: 171 if ( 172 env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, 173 DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK) 174 ) { 175 zkCoordinatedSplitMetaLogs(env); 176 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META); 177 } else { 178 am.getRegionStates().metaLogSplitting(serverName); 179 addChildProcedure(createSplittingWalProcedures(env, true)); 180 setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR); 181 } 182 break; 183 case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR: 184 if (isSplittingDone(env, true)) { 185 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META); 186 am.getRegionStates().metaLogSplit(serverName); 187 } else { 188 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS); 189 } 190 break; 191 case SERVER_CRASH_ASSIGN_META: 192 assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)); 193 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); 194 break; 195 case SERVER_CRASH_GET_REGIONS: 196 this.regionsOnCrashedServer = getRegionsOnCrashedServer(env); 197 // Where to go next? Depends on whether we should split logs at all or 198 // if we should do distributed log splitting. 199 if (regionsOnCrashedServer != null) { 200 LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size()); 201 if (LOG.isTraceEnabled()) { 202 this.regionsOnCrashedServer.stream().forEach(ri -> LOG.trace(ri.getShortNameToLog())); 203 } 204 } 205 if (!this.shouldSplitWal) { 206 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 207 } else { 208 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); 209 } 210 break; 211 case SERVER_CRASH_SPLIT_LOGS: 212 if ( 213 env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, 214 DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK) 215 ) { 216 zkCoordinatedSplitLogs(env); 217 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 218 } else { 219 am.getRegionStates().logSplitting(this.serverName); 220 addChildProcedure(createSplittingWalProcedures(env, false)); 221 setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_WALS_DIR); 222 } 223 break; 224 case SERVER_CRASH_DELETE_SPLIT_WALS_DIR: 225 if (isSplittingDone(env, false)) { 226 cleanupSplitDir(env); 227 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 228 am.getRegionStates().logSplit(this.serverName); 229 } else { 230 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); 231 } 232 break; 233 case SERVER_CRASH_ASSIGN: 234 // If no regions to assign, skip assign and skip to the finish. 235 // Filter out meta regions. Those are handled elsewhere in this procedure. 236 // Filter changes this.regionsOnCrashedServer. 237 if (filterDefaultMetaRegions()) { 238 if (LOG.isTraceEnabled()) { 239 LOG.trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer) 240 + ", " + this + "; cycles=" + getCycles()); 241 } 242 assignRegions(env, regionsOnCrashedServer); 243 } 244 setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES); 245 break; 246 case SERVER_CRASH_HANDLE_RIT2: 247 // Noop. Left in place because we used to call handleRIT here for a second time 248 // but no longer necessary since HBASE-20634. 249 setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES); 250 break; 251 case SERVER_CRASH_CLAIM_REPLICATION_QUEUES: 252 addChildProcedure(new ClaimReplicationQueuesProcedure(serverName)); 253 setNextState(ServerCrashState.SERVER_CRASH_FINISH); 254 break; 255 case SERVER_CRASH_FINISH: 256 LOG.info("removed crashed server {} after splitting done", serverName); 257 services.getAssignmentManager().getRegionStates().removeServer(serverName); 258 services.getServerManager().getDeadServers().finish(serverName); 259 updateProgress(true); 260 return Flow.NO_MORE_STATE; 261 default: 262 throw new UnsupportedOperationException("unhandled state=" + state); 263 } 264 } catch (IOException e) { 265 LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e); 266 } 267 return Flow.HAS_MORE_STATE; 268 } 269 270 /** Returns List of Regions on crashed server. */ 271 List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) { 272 return env.getMasterServices().getAssignmentManager().getRegionsOnServer(serverName); 273 } 274 275 private void cleanupSplitDir(MasterProcedureEnv env) { 276 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 277 try { 278 if (!this.carryingMeta) { 279 // If we are NOT carrying hbase:meta, check if any left-over hbase:meta WAL files from an 280 // old hbase:meta tenancy on this server; clean these up if any before trying to remove the 281 // WAL directory of this server or we will fail. See archiveMetaLog comment for more details 282 // on this condition. 283 env.getMasterServices().getMasterWalManager().archiveMetaLog(this.serverName); 284 } 285 splitWALManager.deleteWALDir(serverName); 286 } catch (IOException e) { 287 LOG.info("Remove WAL directory for {} failed, ignore...{}", serverName, e.getMessage()); 288 } 289 } 290 291 private boolean isSplittingDone(MasterProcedureEnv env, boolean splitMeta) { 292 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 293 try { 294 int wals = splitWALManager.getWALsToSplit(serverName, splitMeta).size(); 295 LOG.debug("Check if {} WAL splitting is done? wals={}, meta={}", serverName, wals, splitMeta); 296 return wals == 0; 297 } catch (IOException e) { 298 LOG.warn("Get WALs of {} failed, retry...", serverName, e); 299 return false; 300 } 301 } 302 303 private Procedure[] createSplittingWalProcedures(MasterProcedureEnv env, boolean splitMeta) 304 throws IOException { 305 LOG.info("Splitting WALs {}, isMeta: {}", this, splitMeta); 306 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 307 List<Procedure> procedures = splitWALManager.splitWALs(serverName, splitMeta); 308 return procedures.toArray(new Procedure[procedures.size()]); 309 } 310 311 private boolean filterDefaultMetaRegions() { 312 if (regionsOnCrashedServer == null) { 313 return false; 314 } 315 regionsOnCrashedServer.removeIf(this::isDefaultMetaRegion); 316 return !regionsOnCrashedServer.isEmpty(); 317 } 318 319 private boolean isDefaultMetaRegion(RegionInfo hri) { 320 return hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri); 321 } 322 323 /** 324 * Split hbase:meta logs using 'classic' zk-based coordination. Superceded by procedure-based WAL 325 * splitting. 326 * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean) 327 */ 328 private void zkCoordinatedSplitMetaLogs(MasterProcedureEnv env) throws IOException { 329 LOG.debug("Splitting meta WALs {}", this); 330 MasterWalManager mwm = env.getMasterServices().getMasterWalManager(); 331 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 332 am.getRegionStates().metaLogSplitting(serverName); 333 mwm.splitMetaLog(serverName); 334 am.getRegionStates().metaLogSplit(serverName); 335 LOG.debug("Done splitting meta WALs {}", this); 336 } 337 338 /** 339 * Split logs using 'classic' zk-based coordination. Superceded by procedure-based WAL splitting. 340 * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean) 341 */ 342 private void zkCoordinatedSplitLogs(final MasterProcedureEnv env) throws IOException { 343 LOG.debug("Splitting WALs {}", this); 344 MasterWalManager mwm = env.getMasterServices().getMasterWalManager(); 345 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 346 // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running. 347 // PROBLEM!!! WE BLOCK HERE. Can block for hours if hundreds of WALs to split and hundreds 348 // of SCPs running because big cluster crashed down. 349 am.getRegionStates().logSplitting(this.serverName); 350 mwm.splitLog(this.serverName); 351 if (!carryingMeta) { 352 mwm.archiveMetaLog(this.serverName); 353 } 354 am.getRegionStates().logSplit(this.serverName); 355 LOG.debug("Done splitting WALs {}", this); 356 } 357 358 void updateProgress(boolean updateState) { 359 String msg = "Processing ServerCrashProcedure of " + serverName; 360 if (status == null) { 361 status = TaskMonitor.get().createStatus(msg); 362 return; 363 } 364 if (currentRunningState == ServerCrashState.SERVER_CRASH_FINISH) { 365 status.markComplete(msg + " done"); 366 return; 367 } 368 if (updateState) { 369 currentRunningState = getCurrentState(); 370 } 371 int childrenLatch = getChildrenLatch(); 372 status.setStatus(msg + " current State " + currentRunningState 373 + (childrenLatch > 0 374 ? "; remaining num of running child procedures = " + childrenLatch 375 : "")); 376 } 377 378 @Override 379 protected void rollbackState(MasterProcedureEnv env, ServerCrashState state) throws IOException { 380 // Can't rollback. 381 throw new UnsupportedOperationException("unhandled state=" + state); 382 } 383 384 @Override 385 protected ServerCrashState getState(int stateId) { 386 return ServerCrashState.forNumber(stateId); 387 } 388 389 @Override 390 protected int getStateId(ServerCrashState state) { 391 return state.getNumber(); 392 } 393 394 @Override 395 protected ServerCrashState getInitialState() { 396 return ServerCrashState.SERVER_CRASH_START; 397 } 398 399 @Override 400 protected boolean abort(MasterProcedureEnv env) { 401 // TODO 402 return false; 403 } 404 405 @Override 406 protected LockState acquireLock(final MasterProcedureEnv env) { 407 if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) { 408 return LockState.LOCK_EVENT_WAIT; 409 } 410 return LockState.LOCK_ACQUIRED; 411 } 412 413 @Override 414 protected void releaseLock(final MasterProcedureEnv env) { 415 env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName()); 416 } 417 418 @Override 419 public void toStringClassDetails(StringBuilder sb) { 420 sb.append(getProcName()); 421 sb.append(", splitWal="); 422 sb.append(shouldSplitWal); 423 sb.append(", meta="); 424 sb.append(carryingMeta); 425 } 426 427 @Override 428 public String getProcName() { 429 return getClass().getSimpleName() + " " + this.serverName; 430 } 431 432 @Override 433 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 434 super.serializeStateData(serializer); 435 436 MasterProcedureProtos.ServerCrashStateData.Builder state = 437 MasterProcedureProtos.ServerCrashStateData.newBuilder() 438 .setServerName(ProtobufUtil.toServerName(this.serverName)) 439 .setCarryingMeta(this.carryingMeta).setShouldSplitWal(this.shouldSplitWal); 440 if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) { 441 for (RegionInfo hri : this.regionsOnCrashedServer) { 442 state.addRegionsOnCrashedServer(ProtobufUtil.toRegionInfo(hri)); 443 } 444 } 445 serializer.serialize(state.build()); 446 } 447 448 @Override 449 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 450 super.deserializeStateData(serializer); 451 452 MasterProcedureProtos.ServerCrashStateData state = 453 serializer.deserialize(MasterProcedureProtos.ServerCrashStateData.class); 454 this.serverName = ProtobufUtil.toServerName(state.getServerName()); 455 this.carryingMeta = state.hasCarryingMeta() ? state.getCarryingMeta() : false; 456 // shouldSplitWAL has a default over in pb so this invocation will always work. 457 this.shouldSplitWal = state.getShouldSplitWal(); 458 int size = state.getRegionsOnCrashedServerCount(); 459 if (size > 0) { 460 this.regionsOnCrashedServer = new ArrayList<>(size); 461 for (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo ri : state 462 .getRegionsOnCrashedServerList()) { 463 this.regionsOnCrashedServer.add(ProtobufUtil.toRegionInfo(ri)); 464 } 465 } 466 updateProgress(false); 467 } 468 469 @Override 470 public ServerName getServerName() { 471 return this.serverName; 472 } 473 474 @Override 475 public boolean hasMetaTableRegion() { 476 return this.carryingMeta; 477 } 478 479 @Override 480 public ServerOperationType getServerOperationType() { 481 return ServerOperationType.CRASH_HANDLER; 482 } 483 484 @Override 485 protected boolean shouldWaitClientAck(MasterProcedureEnv env) { 486 // The operation is triggered internally on the server 487 // the client does not know about this procedure. 488 return false; 489 } 490 491 /** 492 * Moved out here so can be overridden by the HBCK fix-up SCP to be less strict about what it will 493 * tolerate as a 'match'. 494 * @return True if the region location in <code>rsn</code> matches that of this crashed server. 495 */ 496 protected boolean isMatchingRegionLocation(RegionStateNode rsn) { 497 return this.serverName.equals(rsn.getRegionLocation()); 498 } 499 500 /** 501 * Assign the regions on the crashed RS to other Rses. 502 * <p/> 503 * In this method we will go through all the RegionStateNodes of the give regions to find out 504 * whether there is already an TRSP for the region, if so we interrupt it and let it retry on 505 * other server, otherwise we will schedule a TRSP to bring the region online. 506 * <p/> 507 * We will also check whether the table for a region is enabled, if not, we will skip assigning 508 * it. 509 */ 510 private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) throws IOException { 511 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 512 boolean retainAssignment = env.getMasterConfiguration().getBoolean(MASTER_SCP_RETAIN_ASSIGNMENT, 513 DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT); 514 for (RegionInfo region : regions) { 515 RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region); 516 regionNode.lock(); 517 try { 518 // This is possible, as when a server is dead, TRSP will fail to schedule a RemoteProcedure 519 // and then try to assign the region to a new RS. And before it has updated the region 520 // location to the new RS, we may have already called the am.getRegionsOnServer so we will 521 // consider the region is still on this crashed server. Then before we arrive here, the 522 // TRSP could have updated the region location, or even finished itself, so the region is 523 // no longer on this crashed server any more. We should not try to assign it again. Please 524 // see HBASE-23594 for more details. 525 // UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get 526 // in the way of our clearing out 'Unknown Servers'. 527 if (!isMatchingRegionLocation(regionNode)) { 528 // See HBASE-24117, though we have already changed the shutdown order, it is still worth 529 // double checking here to confirm that we do not skip assignment incorrectly. 530 if (!am.isRunning()) { 531 throw new DoNotRetryIOException( 532 "AssignmentManager has been stopped, can not process assignment any more"); 533 } 534 LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...", 535 this, regionNode, serverName); 536 continue; 537 } 538 if (regionNode.getProcedure() != null) { 539 LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode); 540 regionNode.getProcedure().serverCrashed(env, regionNode, getServerName(), 541 !retainAssignment); 542 continue; 543 } 544 if ( 545 env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(), 546 TableState.State.DISABLING) 547 ) { 548 // We need to change the state here otherwise the TRSP scheduled by DTP will try to 549 // close the region from a dead server and will never succeed. Please see HBASE-23636 550 // for more details. 551 env.getAssignmentManager().regionClosedAbnormally(regionNode); 552 LOG.info("{} found table disabling for region {}, set it state to ABNORMALLY_CLOSED.", 553 this, regionNode); 554 continue; 555 } 556 if ( 557 env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(), 558 TableState.State.DISABLED) 559 ) { 560 // This should not happen, table disabled but has regions on server. 561 LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this); 562 continue; 563 } 564 TransitRegionStateProcedure proc = 565 TransitRegionStateProcedure.assign(env, region, !retainAssignment, null); 566 regionNode.setProcedure(proc); 567 addChildProcedure(proc); 568 } finally { 569 regionNode.unlock(); 570 } 571 } 572 } 573 574 @Override 575 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 576 return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics(); 577 } 578 579 @Override 580 protected boolean holdLock(MasterProcedureEnv env) { 581 return true; 582 } 583 584 public static void updateProgress(MasterProcedureEnv env, long parentId) { 585 if (parentId == NO_PROC_ID) { 586 return; 587 } 588 Procedure parentProcedure = 589 env.getMasterServices().getMasterProcedureExecutor().getProcedure(parentId); 590 if (parentProcedure != null && parentProcedure instanceof ServerCrashProcedure) { 591 ((ServerCrashProcedure) parentProcedure).updateProgress(false); 592 } 593 } 594}