001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK;
021import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import java.util.concurrent.CompletableFuture;
028import org.apache.hadoop.hbase.DoNotRetryIOException;
029import org.apache.hadoop.hbase.ServerName;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.client.RegionInfoBuilder;
032import org.apache.hadoop.hbase.client.RegionReplicaUtil;
033import org.apache.hadoop.hbase.client.TableState;
034import org.apache.hadoop.hbase.master.MasterServices;
035import org.apache.hadoop.hbase.master.MasterWalManager;
036import org.apache.hadoop.hbase.master.SplitWALManager;
037import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
038import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
039import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
040import org.apache.hadoop.hbase.master.replication.AssignReplicationQueuesProcedure;
041import org.apache.hadoop.hbase.master.replication.MigrateReplicationQueueFromZkToTableProcedure;
042import org.apache.hadoop.hbase.monitoring.MonitoredTask;
043import org.apache.hadoop.hbase.monitoring.TaskMonitor;
044import org.apache.hadoop.hbase.procedure2.Procedure;
045import org.apache.hadoop.hbase.procedure2.ProcedureFutureUtil;
046import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
047import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
048import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
049import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
050import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
051import org.apache.yetus.audience.InterfaceAudience;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState;
058import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
059
060/**
061 * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called
062 * ServerShutdownHandler.
063 * <p>
064 * The procedure flow varies dependent on whether meta is assigned and if we are to split logs.
065 * <p>
066 * We come in here after ServerManager has noticed a server has expired. Procedures queued on the
067 * rpc should have been notified about fail and should be concurrently getting themselves ready to
068 * assign elsewhere.
069 */
070@InterfaceAudience.Private
071public class ServerCrashProcedure extends
072  StateMachineProcedure<MasterProcedureEnv, ServerCrashState> implements ServerProcedureInterface {
073  private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class);
074
075  /**
076   * Configuration parameter to enable/disable the retain region assignment during
077   * ServerCrashProcedure.
078   * <p>
079   * By default retain assignment is disabled which makes the failover faster and improve the
080   * availability; useful for cloud scenario where region block locality is not important. Enable
081   * this when RegionServers are deployed on same host where Datanode are running, this will improve
082   * read performance due to local read.
083   * <p>
084   * see HBASE-24900 for more details.
085   */
086  public static final String MASTER_SCP_RETAIN_ASSIGNMENT = "hbase.master.scp.retain.assignment";
087  /** Default value of {@link #MASTER_SCP_RETAIN_ASSIGNMENT} */
088  public static final boolean DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT = false;
089
090  /**
091   * Name of the crashed server to process.
092   */
093  private ServerName serverName;
094
095  /**
096   * Whether DeadServer knows that we are processing it.
097   */
098  private boolean notifiedDeadServer = false;
099
100  /**
101   * Regions that were on the crashed server.
102   */
103  private List<RegionInfo> regionsOnCrashedServer;
104
105  private boolean carryingMeta = false;
106  private boolean shouldSplitWal;
107  private MonitoredTask status;
108  // currentRunningState is updated when ServerCrashProcedure get scheduled, child procedures update
109  // progress will not update the state because the actual state is overwritten by its next state
110  private ServerCrashState currentRunningState = getInitialState();
111
112  private CompletableFuture<Void> updateMetaFuture;
113
114  private int processedRegions = 0;
115
116  /**
117   * Call this constructor queuing up a Procedure.
118   * @param serverName     Name of the crashed server.
119   * @param shouldSplitWal True if we should split WALs as part of crashed server processing.
120   * @param carryingMeta   True if carrying hbase:meta table region.
121   */
122  public ServerCrashProcedure(final MasterProcedureEnv env, final ServerName serverName,
123    final boolean shouldSplitWal, final boolean carryingMeta) {
124    this.serverName = serverName;
125    this.shouldSplitWal = shouldSplitWal;
126    this.carryingMeta = carryingMeta;
127    this.setOwner(env.getRequestUser());
128  }
129
130  /**
131   * Used when deserializing from a procedure store; we'll construct one of these then call
132   * #deserializeStateData(InputStream). Do not use directly.
133   */
134  public ServerCrashProcedure() {
135  }
136
137  public boolean isInRecoverMetaState() {
138    return getCurrentState() == ServerCrashState.SERVER_CRASH_PROCESS_META;
139  }
140
141  @Override
142  protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
143    throws ProcedureSuspendedException, ProcedureYieldException {
144    final MasterServices services = env.getMasterServices();
145    final AssignmentManager am = env.getAssignmentManager();
146    updateProgress(true);
147    // HBASE-14802 If we have not yet notified that we are processing a dead server, do so now.
148    // This adds server to the DeadServer processing list but not to the DeadServers list.
149    // Server gets removed from processing list below on procedure successful finish.
150    if (!notifiedDeadServer) {
151      notifiedDeadServer = true;
152    }
153
154    switch (state) {
155      case SERVER_CRASH_START:
156      case SERVER_CRASH_SPLIT_META_LOGS:
157      case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
158      case SERVER_CRASH_ASSIGN_META:
159        break;
160      default:
161        // If hbase:meta is not assigned, yield.
162        if (env.getAssignmentManager().waitMetaLoaded(this)) {
163          throw new ProcedureSuspendedException();
164        }
165    }
166    try {
167      switch (state) {
168        case SERVER_CRASH_START:
169          LOG.info("Start " + this);
170          // If carrying meta, process it first. Else, get list of regions on crashed server.
171          if (this.carryingMeta) {
172            env.getAssignmentManager()
173              .markRegionsAsCrashed(List.of(RegionInfoBuilder.FIRST_META_REGIONINFO), this);
174            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
175          } else {
176            setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
177          }
178          break;
179        case SERVER_CRASH_SPLIT_META_LOGS:
180          if (
181            env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
182              DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)
183          ) {
184            zkCoordinatedSplitMetaLogs(env);
185            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
186          } else {
187            am.getRegionStates().metaLogSplitting(serverName);
188            addChildProcedure(createSplittingWalProcedures(env, true));
189            setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR);
190          }
191          break;
192        case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
193          if (isSplittingDone(env, true)) {
194            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
195            am.getRegionStates().metaLogSplit(serverName);
196          } else {
197            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
198          }
199          break;
200        case SERVER_CRASH_ASSIGN_META:
201          assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO));
202          setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
203          break;
204        case SERVER_CRASH_GET_REGIONS:
205          this.regionsOnCrashedServer = getRegionsOnCrashedServer(env);
206          // Where to go next? Depends on whether we should split logs at all or
207          // if we should do distributed log splitting.
208          if (regionsOnCrashedServer != null) {
209            LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size());
210            if (LOG.isTraceEnabled()) {
211              this.regionsOnCrashedServer.stream().forEach(ri -> LOG.trace(ri.getShortNameToLog()));
212            }
213            env.getAssignmentManager().markRegionsAsCrashed(regionsOnCrashedServer, this);
214          }
215          if (!this.shouldSplitWal) {
216            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
217          } else {
218            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
219          }
220          break;
221        case SERVER_CRASH_SPLIT_LOGS:
222          if (
223            env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
224              DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)
225          ) {
226            zkCoordinatedSplitLogs(env);
227            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
228          } else {
229            am.getRegionStates().logSplitting(this.serverName);
230            addChildProcedure(createSplittingWalProcedures(env, false));
231            setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_WALS_DIR);
232          }
233          break;
234        case SERVER_CRASH_DELETE_SPLIT_WALS_DIR:
235          if (isSplittingDone(env, false)) {
236            cleanupSplitDir(env);
237            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
238            am.getRegionStates().logSplit(this.serverName);
239          } else {
240            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
241          }
242          break;
243        case SERVER_CRASH_ASSIGN:
244          // If no regions to assign, skip assign and skip to the finish.
245          // Filter out meta regions. Those are handled elsewhere in this procedure.
246          // Filter changes this.regionsOnCrashedServer.
247          if (filterDefaultMetaRegions()) {
248            if (LOG.isTraceEnabled()) {
249              LOG.trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer)
250                + ", " + this + "; cycles=" + getCycles());
251            }
252            assignRegions(env, regionsOnCrashedServer);
253          }
254          // If there is no replication peer, we do not need to enter the claim queues stage.
255          // This is also very important that now we will later initialize ReplicationQueueStorage
256          // so if there is no replication peer added yet, the storage can not be accessed.
257          // And there will be no race because:
258          // 1. For adding replication peer, if the peer storage has not been updated yet, the crash
259          // region server will not have any replication queues for this peer, so it is safe to skip
260          // claiming.
261          // 2. For removing replication peer, it it has already updated the peer storage, then
262          // there is no way to rollback and region servers are already started to close and delete
263          // replication queues, so it is also safe to skip claiming.
264          if (env.getReplicationPeerManager().listPeers(null).isEmpty()) {
265            setNextState(ServerCrashState.SERVER_CRASH_FINISH);
266          } else {
267            setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES);
268          }
269          break;
270        case SERVER_CRASH_HANDLE_RIT2:
271          // Noop. Left in place because we used to call handleRIT here for a second time
272          // but no longer necessary since HBASE-20634.
273          if (env.getReplicationPeerManager().listPeers(null).isEmpty()) {
274            setNextState(ServerCrashState.SERVER_CRASH_FINISH);
275          } else {
276            setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES);
277          }
278          break;
279        case SERVER_CRASH_CLAIM_REPLICATION_QUEUES:
280          if (
281            env.getMasterServices().getProcedures().stream()
282              .filter(p -> p instanceof MigrateReplicationQueueFromZkToTableProcedure)
283              .anyMatch(p -> !p.isFinished())
284          ) {
285            LOG.info("There is a pending {}, will retry claim replication queue later",
286              MigrateReplicationQueueFromZkToTableProcedure.class.getSimpleName());
287            suspend(10_000, true);
288            return Flow.NO_MORE_STATE;
289          }
290          addChildProcedure(new AssignReplicationQueuesProcedure(serverName));
291          setNextState(ServerCrashState.SERVER_CRASH_FINISH);
292          break;
293        case SERVER_CRASH_FINISH:
294          LOG.info("removed crashed server {} after splitting done", serverName);
295          services.getAssignmentManager().getRegionStates().removeServer(serverName);
296          updateProgress(true);
297          return Flow.NO_MORE_STATE;
298        default:
299          throw new UnsupportedOperationException("unhandled state=" + state);
300      }
301    } catch (IOException e) {
302      LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e);
303    }
304    return Flow.HAS_MORE_STATE;
305  }
306
307  /** Returns List of Regions on crashed server. */
308  List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) {
309    return env.getMasterServices().getAssignmentManager().getRegionsOnServer(serverName);
310  }
311
312  private void cleanupSplitDir(MasterProcedureEnv env) {
313    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
314    try {
315      if (!this.carryingMeta) {
316        // If we are NOT carrying hbase:meta, check if any left-over hbase:meta WAL files from an
317        // old hbase:meta tenancy on this server; clean these up if any before trying to remove the
318        // WAL directory of this server or we will fail. See archiveMetaLog comment for more details
319        // on this condition.
320        env.getMasterServices().getMasterWalManager().archiveMetaLog(this.serverName);
321      }
322      splitWALManager.deleteWALDir(serverName);
323    } catch (IOException e) {
324      LOG.info("Remove WAL directory for {} failed, ignore...{}", serverName, e.getMessage());
325    }
326  }
327
328  private boolean isSplittingDone(MasterProcedureEnv env, boolean splitMeta) {
329    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
330    try {
331      int wals = splitWALManager.getWALsToSplit(serverName, splitMeta).size();
332      LOG.debug("Check if {} WAL splitting is done? wals={}, meta={}", serverName, wals, splitMeta);
333      return wals == 0;
334    } catch (IOException e) {
335      LOG.warn("Get WALs of {} failed, retry...", serverName, e);
336      return false;
337    }
338  }
339
340  private Procedure[] createSplittingWalProcedures(MasterProcedureEnv env, boolean splitMeta)
341    throws IOException {
342    LOG.info("Splitting WALs {}, isMeta: {}", this, splitMeta);
343    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
344    List<Procedure> procedures = splitWALManager.splitWALs(serverName, splitMeta);
345    return procedures.toArray(new Procedure[procedures.size()]);
346  }
347
348  private boolean filterDefaultMetaRegions() {
349    if (regionsOnCrashedServer == null) {
350      return false;
351    }
352    regionsOnCrashedServer.removeIf(this::isDefaultMetaRegion);
353    return !regionsOnCrashedServer.isEmpty();
354  }
355
356  private boolean isDefaultMetaRegion(RegionInfo hri) {
357    return hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri);
358  }
359
360  /**
361   * Split hbase:meta logs using 'classic' zk-based coordination. Superceded by procedure-based WAL
362   * splitting.
363   * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean)
364   */
365  private void zkCoordinatedSplitMetaLogs(MasterProcedureEnv env) throws IOException {
366    LOG.debug("Splitting meta WALs {}", this);
367    MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
368    AssignmentManager am = env.getMasterServices().getAssignmentManager();
369    am.getRegionStates().metaLogSplitting(serverName);
370    mwm.splitMetaLog(serverName);
371    am.getRegionStates().metaLogSplit(serverName);
372    LOG.debug("Done splitting meta WALs {}", this);
373  }
374
375  /**
376   * Split logs using 'classic' zk-based coordination. Superceded by procedure-based WAL splitting.
377   * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean)
378   */
379  private void zkCoordinatedSplitLogs(final MasterProcedureEnv env) throws IOException {
380    LOG.debug("Splitting WALs {}", this);
381    MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
382    AssignmentManager am = env.getMasterServices().getAssignmentManager();
383    // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
384    // PROBLEM!!! WE BLOCK HERE. Can block for hours if hundreds of WALs to split and hundreds
385    // of SCPs running because big cluster crashed down.
386    am.getRegionStates().logSplitting(this.serverName);
387    mwm.splitLog(this.serverName);
388    if (!carryingMeta) {
389      mwm.archiveMetaLog(this.serverName);
390    }
391    am.getRegionStates().logSplit(this.serverName);
392    LOG.debug("Done splitting WALs {}", this);
393  }
394
395  void updateProgress(boolean updateState) {
396    String msg = "Processing ServerCrashProcedure of " + serverName;
397    if (status == null) {
398      status = TaskMonitor.get().createStatus(msg);
399      return;
400    }
401    if (currentRunningState == ServerCrashState.SERVER_CRASH_FINISH) {
402      status.markComplete(msg + " done");
403      return;
404    }
405    if (updateState) {
406      currentRunningState = getCurrentState();
407    }
408    int childrenLatch = getChildrenLatch();
409    status.setStatus(msg + " current State " + currentRunningState
410      + (childrenLatch > 0
411        ? "; remaining num of running child procedures = " + childrenLatch
412        : ""));
413  }
414
415  @Override
416  protected void rollbackState(MasterProcedureEnv env, ServerCrashState state) throws IOException {
417    // Can't rollback.
418    throw new UnsupportedOperationException("unhandled state=" + state);
419  }
420
421  @Override
422  protected ServerCrashState getState(int stateId) {
423    return ServerCrashState.forNumber(stateId);
424  }
425
426  @Override
427  protected int getStateId(ServerCrashState state) {
428    return state.getNumber();
429  }
430
431  @Override
432  protected ServerCrashState getInitialState() {
433    return ServerCrashState.SERVER_CRASH_START;
434  }
435
436  @Override
437  protected boolean abort(MasterProcedureEnv env) {
438    // TODO
439    return false;
440  }
441
442  @Override
443  protected LockState acquireLock(final MasterProcedureEnv env) {
444    if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) {
445      return LockState.LOCK_EVENT_WAIT;
446    }
447    return LockState.LOCK_ACQUIRED;
448  }
449
450  @Override
451  protected void releaseLock(final MasterProcedureEnv env) {
452    env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName());
453  }
454
455  @Override
456  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
457    setState(ProcedureProtos.ProcedureState.RUNNABLE);
458    env.getProcedureScheduler().addFront(this);
459    return false;
460  }
461
462  @Override
463  public void toStringClassDetails(StringBuilder sb) {
464    sb.append(getProcName());
465    sb.append(", splitWal=");
466    sb.append(shouldSplitWal);
467    sb.append(", meta=");
468    sb.append(carryingMeta);
469  }
470
471  @Override
472  public String getProcName() {
473    return getClass().getSimpleName() + " " + this.serverName;
474  }
475
476  @Override
477  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
478    super.serializeStateData(serializer);
479
480    MasterProcedureProtos.ServerCrashStateData.Builder state =
481      MasterProcedureProtos.ServerCrashStateData.newBuilder()
482        .setServerName(ProtobufUtil.toServerName(this.serverName))
483        .setCarryingMeta(this.carryingMeta).setShouldSplitWal(this.shouldSplitWal);
484    if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
485      for (RegionInfo hri : this.regionsOnCrashedServer) {
486        state.addRegionsOnCrashedServer(ProtobufUtil.toRegionInfo(hri));
487      }
488    }
489    serializer.serialize(state.build());
490  }
491
492  @Override
493  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
494    super.deserializeStateData(serializer);
495
496    MasterProcedureProtos.ServerCrashStateData state =
497      serializer.deserialize(MasterProcedureProtos.ServerCrashStateData.class);
498    this.serverName = ProtobufUtil.toServerName(state.getServerName());
499    this.carryingMeta = state.hasCarryingMeta() ? state.getCarryingMeta() : false;
500    // shouldSplitWAL has a default over in pb so this invocation will always work.
501    this.shouldSplitWal = state.getShouldSplitWal();
502    int size = state.getRegionsOnCrashedServerCount();
503    if (size > 0) {
504      this.regionsOnCrashedServer = new ArrayList<>(size);
505      for (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo ri : state
506        .getRegionsOnCrashedServerList()) {
507        this.regionsOnCrashedServer.add(ProtobufUtil.toRegionInfo(ri));
508      }
509    }
510    updateProgress(false);
511  }
512
513  @Override
514  public ServerName getServerName() {
515    return this.serverName;
516  }
517
518  @Override
519  public boolean hasMetaTableRegion() {
520    return this.carryingMeta;
521  }
522
523  @Override
524  public ServerOperationType getServerOperationType() {
525    return ServerOperationType.CRASH_HANDLER;
526  }
527
528  @Override
529  protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
530    // The operation is triggered internally on the server
531    // the client does not know about this procedure.
532    return false;
533  }
534
535  /**
536   * Moved out here so can be overridden by the HBCK fix-up SCP to be less strict about what it will
537   * tolerate as a 'match'.
538   * @return True if the region location in <code>rsn</code> matches that of this crashed server.
539   */
540  protected boolean isMatchingRegionLocation(RegionStateNode rsn) {
541    return this.serverName.equals(rsn.getRegionLocation());
542  }
543
544  private CompletableFuture<Void> getUpdateMetaFuture() {
545    return updateMetaFuture;
546  }
547
548  private void setUpdateMetaFuture(CompletableFuture<Void> f) {
549    updateMetaFuture = f;
550  }
551
552  /**
553   * Assign the regions on the crashed RS to other Rses.
554   * <p/>
555   * In this method we will go through all the RegionStateNodes of the give regions to find out
556   * whether there is already an TRSP for the region, if so we interrupt it and let it retry on
557   * other server, otherwise we will schedule a TRSP to bring the region online.
558   * <p/>
559   * We will also check whether the table for a region is enabled, if not, we will skip assigning
560   * it.
561   */
562  private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions)
563    throws IOException, ProcedureSuspendedException {
564    AssignmentManager am = env.getMasterServices().getAssignmentManager();
565    boolean retainAssignment = env.getMasterConfiguration().getBoolean(MASTER_SCP_RETAIN_ASSIGNMENT,
566      DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT);
567    // Since we may suspend in the middle of this loop, so here we use processedRegions to record
568    // the progress, so next time we can locate the correct region
569    // We do not need to persist the processedRegions when serializing the procedure, as when master
570    // restarts, the sub procedure list will be cleared when rescheduling this SCP again, so we need
571    // to start from beginning.
572    for (int n = regions.size(); processedRegions < n; processedRegions++) {
573      RegionInfo region = regions.get(processedRegions);
574      RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region);
575      // There are two possible ways where we have already hold the lock here
576      // 1. We have already hold the lock and we suspend while updating meta, so after being woken
577      // up, we should skip lock again.
578      // 2. We suspend the procedure while trying to hold the lock, and finally it is our turn to
579      // hold the lock and we schedule the procedure again, this time we should have already hold
580      // the lock, so we do not need to lock again
581      if (!regionNode.isLockedBy(this)) {
582        regionNode.lock(this, () -> ProcedureFutureUtil.wakeUp(this, env));
583      }
584      try {
585        if (
586          ProcedureFutureUtil.checkFuture(this, this::getUpdateMetaFuture,
587            this::setUpdateMetaFuture, () -> {
588            })
589        ) {
590          continue;
591        }
592        // This is possible, as when a server is dead, TRSP will fail to schedule a RemoteProcedure
593        // and then try to assign the region to a new RS. And before it has updated the region
594        // location to the new RS, we may have already called the am.getRegionsOnServer so we will
595        // consider the region is still on this crashed server. Then before we arrive here, the
596        // TRSP could have updated the region location, or even finished itself, so the region is
597        // no longer on this crashed server any more. We should not try to assign it again. Please
598        // see HBASE-23594 for more details.
599        // UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get
600        // in the way of our clearing out 'Unknown Servers'.
601        if (!isMatchingRegionLocation(regionNode)) {
602          // See HBASE-24117, though we have already changed the shutdown order, it is still worth
603          // double checking here to confirm that we do not skip assignment incorrectly.
604          if (!am.isRunning()) {
605            throw new DoNotRetryIOException(
606              "AssignmentManager has been stopped, can not process assignment any more");
607          }
608          LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...",
609            this, regionNode, serverName);
610          continue;
611        }
612        if (regionNode.getProcedure() != null) {
613          LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode);
614          ProcedureFutureUtil.suspendIfNecessary(this, this::setUpdateMetaFuture, regionNode
615            .getProcedure().serverCrashed(env, regionNode, getServerName(), !retainAssignment), env,
616            () -> {
617            });
618          continue;
619        }
620        if (
621          env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(),
622            TableState.State.DISABLED)
623        ) {
624          // This should not happen, table disabled but has regions on server.
625          LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this);
626          continue;
627        }
628        TransitRegionStateProcedure proc =
629          TransitRegionStateProcedure.assign(env, region, !retainAssignment, null);
630        regionNode.setProcedure(proc);
631        // It is OK to still use addChildProcedure even if we suspend in the middle of this loop, as
632        // the subProcList will only be cleared when we successfully returned from the
633        // executeFromState method. This means we will submit all the TRSPs after we successfully
634        // finished this loop
635        addChildProcedure(proc);
636      } finally {
637        if (updateMetaFuture == null) {
638          regionNode.unlock(this);
639        }
640      }
641    }
642    // we will call this method two times if the region server carries meta, so we need to reset it
643    // to 0 after successfully finished the above loop
644    processedRegions = 0;
645  }
646
647  @Override
648  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
649    return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics();
650  }
651
652  @Override
653  protected boolean holdLock(MasterProcedureEnv env) {
654    return true;
655  }
656
657  public static void updateProgress(MasterProcedureEnv env, long parentId) {
658    if (parentId == NO_PROC_ID) {
659      return;
660    }
661    Procedure parentProcedure =
662      env.getMasterServices().getMasterProcedureExecutor().getProcedure(parentId);
663    if (parentProcedure != null && parentProcedure instanceof ServerCrashProcedure) {
664      ((ServerCrashProcedure) parentProcedure).updateProgress(false);
665    }
666  }
667}