001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK;
021import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.hbase.DoNotRetryIOException;
028import org.apache.hadoop.hbase.ServerName;
029import org.apache.hadoop.hbase.client.RegionInfo;
030import org.apache.hadoop.hbase.client.RegionInfoBuilder;
031import org.apache.hadoop.hbase.client.RegionReplicaUtil;
032import org.apache.hadoop.hbase.client.TableState;
033import org.apache.hadoop.hbase.master.MasterServices;
034import org.apache.hadoop.hbase.master.MasterWalManager;
035import org.apache.hadoop.hbase.master.SplitWALManager;
036import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
037import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
038import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
039import org.apache.hadoop.hbase.master.replication.ClaimReplicationQueuesProcedure;
040import org.apache.hadoop.hbase.monitoring.MonitoredTask;
041import org.apache.hadoop.hbase.monitoring.TaskMonitor;
042import org.apache.hadoop.hbase.procedure2.Procedure;
043import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
044import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
045import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
046import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
047import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
048import org.apache.yetus.audience.InterfaceAudience;
049import org.slf4j.Logger;
050import org.slf4j.LoggerFactory;
051
052import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
053import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
054import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState;
055
056/**
057 * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called
058 * ServerShutdownHandler.
059 * <p>
060 * The procedure flow varies dependent on whether meta is assigned and if we are to split logs.
061 * <p>
062 * We come in here after ServerManager has noticed a server has expired. Procedures queued on the
063 * rpc should have been notified about fail and should be concurrently getting themselves ready to
064 * assign elsewhere.
065 */
066@InterfaceAudience.Private
067public class ServerCrashProcedure extends
068  StateMachineProcedure<MasterProcedureEnv, ServerCrashState> implements ServerProcedureInterface {
069  private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class);
070
071  /**
072   * Configuration parameter to enable/disable the retain region assignment during
073   * ServerCrashProcedure.
074   * <p>
075   * By default retain assignment is disabled which makes the failover faster and improve the
076   * availability; useful for cloud scenario where region block locality is not important. Enable
077   * this when RegionServers are deployed on same host where Datanode are running, this will improve
078   * read performance due to local read.
079   * <p>
080   * see HBASE-24900 for more details.
081   */
082  public static final String MASTER_SCP_RETAIN_ASSIGNMENT = "hbase.master.scp.retain.assignment";
083  /** Default value of {@link #MASTER_SCP_RETAIN_ASSIGNMENT} */
084  public static final boolean DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT = false;
085
086  /**
087   * Name of the crashed server to process.
088   */
089  private ServerName serverName;
090
091  /**
092   * Whether DeadServer knows that we are processing it.
093   */
094  private boolean notifiedDeadServer = false;
095
096  /**
097   * Regions that were on the crashed server.
098   */
099  private List<RegionInfo> regionsOnCrashedServer;
100
101  private boolean carryingMeta = false;
102  private boolean shouldSplitWal;
103  private MonitoredTask status;
104  // currentRunningState is updated when ServerCrashProcedure get scheduled, child procedures update
105  // progress will not update the state because the actual state is overwritten by its next state
106  private ServerCrashState currentRunningState = getInitialState();
107
108  /**
109   * Call this constructor queuing up a Procedure.
110   * @param serverName     Name of the crashed server.
111   * @param shouldSplitWal True if we should split WALs as part of crashed server processing.
112   * @param carryingMeta   True if carrying hbase:meta table region.
113   */
114  public ServerCrashProcedure(final MasterProcedureEnv env, final ServerName serverName,
115    final boolean shouldSplitWal, final boolean carryingMeta) {
116    this.serverName = serverName;
117    this.shouldSplitWal = shouldSplitWal;
118    this.carryingMeta = carryingMeta;
119    this.setOwner(env.getRequestUser());
120  }
121
122  /**
123   * Used when deserializing from a procedure store; we'll construct one of these then call
124   * #deserializeStateData(InputStream). Do not use directly.
125   */
126  public ServerCrashProcedure() {
127  }
128
129  public boolean isInRecoverMetaState() {
130    return getCurrentState() == ServerCrashState.SERVER_CRASH_PROCESS_META;
131  }
132
133  @Override
134  protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
135    throws ProcedureSuspendedException, ProcedureYieldException {
136    final MasterServices services = env.getMasterServices();
137    final AssignmentManager am = env.getAssignmentManager();
138    updateProgress(true);
139    // HBASE-14802 If we have not yet notified that we are processing a dead server, do so now.
140    // This adds server to the DeadServer processing list but not to the DeadServers list.
141    // Server gets removed from processing list below on procedure successful finish.
142    if (!notifiedDeadServer) {
143      services.getServerManager().getDeadServers().processing(serverName);
144      notifiedDeadServer = true;
145    }
146
147    switch (state) {
148      case SERVER_CRASH_START:
149      case SERVER_CRASH_SPLIT_META_LOGS:
150      case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
151      case SERVER_CRASH_ASSIGN_META:
152        break;
153      default:
154        // If hbase:meta is not assigned, yield.
155        if (env.getAssignmentManager().waitMetaLoaded(this)) {
156          throw new ProcedureSuspendedException();
157        }
158    }
159    try {
160      switch (state) {
161        case SERVER_CRASH_START:
162          LOG.info("Start " + this);
163          // If carrying meta, process it first. Else, get list of regions on crashed server.
164          if (this.carryingMeta) {
165            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
166          } else {
167            setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
168          }
169          break;
170        case SERVER_CRASH_SPLIT_META_LOGS:
171          if (
172            env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
173              DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)
174          ) {
175            zkCoordinatedSplitMetaLogs(env);
176            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
177          } else {
178            am.getRegionStates().metaLogSplitting(serverName);
179            addChildProcedure(createSplittingWalProcedures(env, true));
180            setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR);
181          }
182          break;
183        case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
184          if (isSplittingDone(env, true)) {
185            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
186            am.getRegionStates().metaLogSplit(serverName);
187          } else {
188            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
189          }
190          break;
191        case SERVER_CRASH_ASSIGN_META:
192          assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO));
193          setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
194          break;
195        case SERVER_CRASH_GET_REGIONS:
196          this.regionsOnCrashedServer = getRegionsOnCrashedServer(env);
197          // Where to go next? Depends on whether we should split logs at all or
198          // if we should do distributed log splitting.
199          if (regionsOnCrashedServer != null) {
200            LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size());
201            if (LOG.isTraceEnabled()) {
202              this.regionsOnCrashedServer.stream().forEach(ri -> LOG.trace(ri.getShortNameToLog()));
203            }
204          }
205          if (!this.shouldSplitWal) {
206            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
207          } else {
208            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
209          }
210          break;
211        case SERVER_CRASH_SPLIT_LOGS:
212          if (
213            env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
214              DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)
215          ) {
216            zkCoordinatedSplitLogs(env);
217            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
218          } else {
219            am.getRegionStates().logSplitting(this.serverName);
220            addChildProcedure(createSplittingWalProcedures(env, false));
221            setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_WALS_DIR);
222          }
223          break;
224        case SERVER_CRASH_DELETE_SPLIT_WALS_DIR:
225          if (isSplittingDone(env, false)) {
226            cleanupSplitDir(env);
227            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
228            am.getRegionStates().logSplit(this.serverName);
229          } else {
230            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
231          }
232          break;
233        case SERVER_CRASH_ASSIGN:
234          // If no regions to assign, skip assign and skip to the finish.
235          // Filter out meta regions. Those are handled elsewhere in this procedure.
236          // Filter changes this.regionsOnCrashedServer.
237          if (filterDefaultMetaRegions()) {
238            if (LOG.isTraceEnabled()) {
239              LOG.trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer)
240                + ", " + this + "; cycles=" + getCycles());
241            }
242            assignRegions(env, regionsOnCrashedServer);
243          }
244          setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES);
245          break;
246        case SERVER_CRASH_HANDLE_RIT2:
247          // Noop. Left in place because we used to call handleRIT here for a second time
248          // but no longer necessary since HBASE-20634.
249          setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES);
250          break;
251        case SERVER_CRASH_CLAIM_REPLICATION_QUEUES:
252          addChildProcedure(new ClaimReplicationQueuesProcedure(serverName));
253          setNextState(ServerCrashState.SERVER_CRASH_FINISH);
254          break;
255        case SERVER_CRASH_FINISH:
256          LOG.info("removed crashed server {} after splitting done", serverName);
257          services.getAssignmentManager().getRegionStates().removeServer(serverName);
258          services.getServerManager().getDeadServers().finish(serverName);
259          updateProgress(true);
260          return Flow.NO_MORE_STATE;
261        default:
262          throw new UnsupportedOperationException("unhandled state=" + state);
263      }
264    } catch (IOException e) {
265      LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e);
266    }
267    return Flow.HAS_MORE_STATE;
268  }
269
270  /** Returns List of Regions on crashed server. */
271  List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) {
272    return env.getMasterServices().getAssignmentManager().getRegionsOnServer(serverName);
273  }
274
275  private void cleanupSplitDir(MasterProcedureEnv env) {
276    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
277    try {
278      if (!this.carryingMeta) {
279        // If we are NOT carrying hbase:meta, check if any left-over hbase:meta WAL files from an
280        // old hbase:meta tenancy on this server; clean these up if any before trying to remove the
281        // WAL directory of this server or we will fail. See archiveMetaLog comment for more details
282        // on this condition.
283        env.getMasterServices().getMasterWalManager().archiveMetaLog(this.serverName);
284      }
285      splitWALManager.deleteWALDir(serverName);
286    } catch (IOException e) {
287      LOG.info("Remove WAL directory for {} failed, ignore...{}", serverName, e.getMessage());
288    }
289  }
290
291  private boolean isSplittingDone(MasterProcedureEnv env, boolean splitMeta) {
292    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
293    try {
294      int wals = splitWALManager.getWALsToSplit(serverName, splitMeta).size();
295      LOG.debug("Check if {} WAL splitting is done? wals={}, meta={}", serverName, wals, splitMeta);
296      return wals == 0;
297    } catch (IOException e) {
298      LOG.warn("Get WALs of {} failed, retry...", serverName, e);
299      return false;
300    }
301  }
302
303  private Procedure[] createSplittingWalProcedures(MasterProcedureEnv env, boolean splitMeta)
304    throws IOException {
305    LOG.info("Splitting WALs {}, isMeta: {}", this, splitMeta);
306    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
307    List<Procedure> procedures = splitWALManager.splitWALs(serverName, splitMeta);
308    return procedures.toArray(new Procedure[procedures.size()]);
309  }
310
311  private boolean filterDefaultMetaRegions() {
312    if (regionsOnCrashedServer == null) {
313      return false;
314    }
315    regionsOnCrashedServer.removeIf(this::isDefaultMetaRegion);
316    return !regionsOnCrashedServer.isEmpty();
317  }
318
319  private boolean isDefaultMetaRegion(RegionInfo hri) {
320    return hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri);
321  }
322
323  /**
324   * Split hbase:meta logs using 'classic' zk-based coordination. Superceded by procedure-based WAL
325   * splitting.
326   * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean)
327   */
328  private void zkCoordinatedSplitMetaLogs(MasterProcedureEnv env) throws IOException {
329    LOG.debug("Splitting meta WALs {}", this);
330    MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
331    AssignmentManager am = env.getMasterServices().getAssignmentManager();
332    am.getRegionStates().metaLogSplitting(serverName);
333    mwm.splitMetaLog(serverName);
334    am.getRegionStates().metaLogSplit(serverName);
335    LOG.debug("Done splitting meta WALs {}", this);
336  }
337
338  /**
339   * Split logs using 'classic' zk-based coordination. Superceded by procedure-based WAL splitting.
340   * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean)
341   */
342  private void zkCoordinatedSplitLogs(final MasterProcedureEnv env) throws IOException {
343    LOG.debug("Splitting WALs {}", this);
344    MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
345    AssignmentManager am = env.getMasterServices().getAssignmentManager();
346    // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
347    // PROBLEM!!! WE BLOCK HERE. Can block for hours if hundreds of WALs to split and hundreds
348    // of SCPs running because big cluster crashed down.
349    am.getRegionStates().logSplitting(this.serverName);
350    mwm.splitLog(this.serverName);
351    if (!carryingMeta) {
352      mwm.archiveMetaLog(this.serverName);
353    }
354    am.getRegionStates().logSplit(this.serverName);
355    LOG.debug("Done splitting WALs {}", this);
356  }
357
358  void updateProgress(boolean updateState) {
359    String msg = "Processing ServerCrashProcedure of " + serverName;
360    if (status == null) {
361      status = TaskMonitor.get().createStatus(msg);
362      return;
363    }
364    if (currentRunningState == ServerCrashState.SERVER_CRASH_FINISH) {
365      status.markComplete(msg + " done");
366      return;
367    }
368    if (updateState) {
369      currentRunningState = getCurrentState();
370    }
371    int childrenLatch = getChildrenLatch();
372    status.setStatus(msg + " current State " + currentRunningState
373      + (childrenLatch > 0
374        ? "; remaining num of running child procedures = " + childrenLatch
375        : ""));
376  }
377
378  @Override
379  protected void rollbackState(MasterProcedureEnv env, ServerCrashState state) throws IOException {
380    // Can't rollback.
381    throw new UnsupportedOperationException("unhandled state=" + state);
382  }
383
384  @Override
385  protected ServerCrashState getState(int stateId) {
386    return ServerCrashState.forNumber(stateId);
387  }
388
389  @Override
390  protected int getStateId(ServerCrashState state) {
391    return state.getNumber();
392  }
393
394  @Override
395  protected ServerCrashState getInitialState() {
396    return ServerCrashState.SERVER_CRASH_START;
397  }
398
399  @Override
400  protected boolean abort(MasterProcedureEnv env) {
401    // TODO
402    return false;
403  }
404
405  @Override
406  protected LockState acquireLock(final MasterProcedureEnv env) {
407    if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) {
408      return LockState.LOCK_EVENT_WAIT;
409    }
410    return LockState.LOCK_ACQUIRED;
411  }
412
413  @Override
414  protected void releaseLock(final MasterProcedureEnv env) {
415    env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName());
416  }
417
418  @Override
419  public void toStringClassDetails(StringBuilder sb) {
420    sb.append(getProcName());
421    sb.append(", splitWal=");
422    sb.append(shouldSplitWal);
423    sb.append(", meta=");
424    sb.append(carryingMeta);
425  }
426
427  @Override
428  public String getProcName() {
429    return getClass().getSimpleName() + " " + this.serverName;
430  }
431
432  @Override
433  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
434    super.serializeStateData(serializer);
435
436    MasterProcedureProtos.ServerCrashStateData.Builder state =
437      MasterProcedureProtos.ServerCrashStateData.newBuilder()
438        .setServerName(ProtobufUtil.toServerName(this.serverName))
439        .setCarryingMeta(this.carryingMeta).setShouldSplitWal(this.shouldSplitWal);
440    if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
441      for (RegionInfo hri : this.regionsOnCrashedServer) {
442        state.addRegionsOnCrashedServer(ProtobufUtil.toRegionInfo(hri));
443      }
444    }
445    serializer.serialize(state.build());
446  }
447
448  @Override
449  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
450    super.deserializeStateData(serializer);
451
452    MasterProcedureProtos.ServerCrashStateData state =
453      serializer.deserialize(MasterProcedureProtos.ServerCrashStateData.class);
454    this.serverName = ProtobufUtil.toServerName(state.getServerName());
455    this.carryingMeta = state.hasCarryingMeta() ? state.getCarryingMeta() : false;
456    // shouldSplitWAL has a default over in pb so this invocation will always work.
457    this.shouldSplitWal = state.getShouldSplitWal();
458    int size = state.getRegionsOnCrashedServerCount();
459    if (size > 0) {
460      this.regionsOnCrashedServer = new ArrayList<>(size);
461      for (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo ri : state
462        .getRegionsOnCrashedServerList()) {
463        this.regionsOnCrashedServer.add(ProtobufUtil.toRegionInfo(ri));
464      }
465    }
466    updateProgress(false);
467  }
468
469  @Override
470  public ServerName getServerName() {
471    return this.serverName;
472  }
473
474  @Override
475  public boolean hasMetaTableRegion() {
476    return this.carryingMeta;
477  }
478
479  @Override
480  public ServerOperationType getServerOperationType() {
481    return ServerOperationType.CRASH_HANDLER;
482  }
483
484  @Override
485  protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
486    // The operation is triggered internally on the server
487    // the client does not know about this procedure.
488    return false;
489  }
490
491  /**
492   * Moved out here so can be overridden by the HBCK fix-up SCP to be less strict about what it will
493   * tolerate as a 'match'.
494   * @return True if the region location in <code>rsn</code> matches that of this crashed server.
495   */
496  protected boolean isMatchingRegionLocation(RegionStateNode rsn) {
497    return this.serverName.equals(rsn.getRegionLocation());
498  }
499
500  /**
501   * Assign the regions on the crashed RS to other Rses.
502   * <p/>
503   * In this method we will go through all the RegionStateNodes of the give regions to find out
504   * whether there is already an TRSP for the region, if so we interrupt it and let it retry on
505   * other server, otherwise we will schedule a TRSP to bring the region online.
506   * <p/>
507   * We will also check whether the table for a region is enabled, if not, we will skip assigning
508   * it.
509   */
510  private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) throws IOException {
511    AssignmentManager am = env.getMasterServices().getAssignmentManager();
512    boolean retainAssignment = env.getMasterConfiguration().getBoolean(MASTER_SCP_RETAIN_ASSIGNMENT,
513      DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT);
514    for (RegionInfo region : regions) {
515      RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region);
516      regionNode.lock();
517      try {
518        // This is possible, as when a server is dead, TRSP will fail to schedule a RemoteProcedure
519        // and then try to assign the region to a new RS. And before it has updated the region
520        // location to the new RS, we may have already called the am.getRegionsOnServer so we will
521        // consider the region is still on this crashed server. Then before we arrive here, the
522        // TRSP could have updated the region location, or even finished itself, so the region is
523        // no longer on this crashed server any more. We should not try to assign it again. Please
524        // see HBASE-23594 for more details.
525        // UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get
526        // in the way of our clearing out 'Unknown Servers'.
527        if (!isMatchingRegionLocation(regionNode)) {
528          // See HBASE-24117, though we have already changed the shutdown order, it is still worth
529          // double checking here to confirm that we do not skip assignment incorrectly.
530          if (!am.isRunning()) {
531            throw new DoNotRetryIOException(
532              "AssignmentManager has been stopped, can not process assignment any more");
533          }
534          LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...",
535            this, regionNode, serverName);
536          continue;
537        }
538        if (regionNode.getProcedure() != null) {
539          LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode);
540          regionNode.getProcedure().serverCrashed(env, regionNode, getServerName(),
541            !retainAssignment);
542          continue;
543        }
544        if (
545          env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(),
546            TableState.State.DISABLING)
547        ) {
548          // We need to change the state here otherwise the TRSP scheduled by DTP will try to
549          // close the region from a dead server and will never succeed. Please see HBASE-23636
550          // for more details.
551          env.getAssignmentManager().regionClosedAbnormally(regionNode);
552          LOG.info("{} found table disabling for region {}, set it state to ABNORMALLY_CLOSED.",
553            this, regionNode);
554          continue;
555        }
556        if (
557          env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(),
558            TableState.State.DISABLED)
559        ) {
560          // This should not happen, table disabled but has regions on server.
561          LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this);
562          continue;
563        }
564        TransitRegionStateProcedure proc =
565          TransitRegionStateProcedure.assign(env, region, !retainAssignment, null);
566        regionNode.setProcedure(proc);
567        addChildProcedure(proc);
568      } finally {
569        regionNode.unlock();
570      }
571    }
572  }
573
574  @Override
575  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
576    return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics();
577  }
578
579  @Override
580  protected boolean holdLock(MasterProcedureEnv env) {
581    return true;
582  }
583
584  public static void updateProgress(MasterProcedureEnv env, long parentId) {
585    if (parentId == NO_PROC_ID) {
586      return;
587    }
588    Procedure parentProcedure =
589      env.getMasterServices().getMasterProcedureExecutor().getProcedure(parentId);
590    if (parentProcedure != null && parentProcedure instanceof ServerCrashProcedure) {
591      ((ServerCrashProcedure) parentProcedure).updateProgress(false);
592    }
593  }
594}