001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK;
021import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.hbase.DoNotRetryIOException;
028import org.apache.hadoop.hbase.ServerName;
029import org.apache.hadoop.hbase.client.RegionInfo;
030import org.apache.hadoop.hbase.client.RegionInfoBuilder;
031import org.apache.hadoop.hbase.client.RegionReplicaUtil;
032import org.apache.hadoop.hbase.client.TableState;
033import org.apache.hadoop.hbase.master.MasterServices;
034import org.apache.hadoop.hbase.master.MasterWalManager;
035import org.apache.hadoop.hbase.master.SplitWALManager;
036import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
037import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
038import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
039import org.apache.hadoop.hbase.monitoring.MonitoredTask;
040import org.apache.hadoop.hbase.monitoring.TaskMonitor;
041import org.apache.hadoop.hbase.procedure2.Procedure;
042import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
043import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
044import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
045import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
046import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
047import org.apache.yetus.audience.InterfaceAudience;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
052import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
053import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState;
054
055/**
056 * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called
057 * ServerShutdownHandler.
058 *
059 * <p>The procedure flow varies dependent on whether meta is assigned and if we are to split logs.
060 *
061 * <p>We come in here after ServerManager has noticed a server has expired. Procedures
062 * queued on the rpc should have been notified about fail and should be concurrently
063 * getting themselves ready to assign elsewhere.
064 */
065@InterfaceAudience.Private
066public class ServerCrashProcedure
067    extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState>
068    implements ServerProcedureInterface {
069  private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class);
070
071  /**
072   * Name of the crashed server to process.
073   */
074  private ServerName serverName;
075
076  /**
077   * Whether DeadServer knows that we are processing it.
078   */
079  private boolean notifiedDeadServer = false;
080
081  /**
082   * Regions that were on the crashed server.
083   */
084  private List<RegionInfo> regionsOnCrashedServer;
085
086  private boolean carryingMeta = false;
087  private boolean shouldSplitWal;
088  private MonitoredTask status;
089  // currentRunningState is updated when ServerCrashProcedure get scheduled, child procedures update
090  // progress will not update the state because the actual state is overwritten by its next state
091  private ServerCrashState currentRunningState = getInitialState();
092
093  /**
094   * Call this constructor queuing up a Procedure.
095   * @param serverName Name of the crashed server.
096   * @param shouldSplitWal True if we should split WALs as part of crashed server processing.
097   * @param carryingMeta True if carrying hbase:meta table region.
098   */
099  public ServerCrashProcedure(final MasterProcedureEnv env, final ServerName serverName,
100      final boolean shouldSplitWal, final boolean carryingMeta) {
101    this.serverName = serverName;
102    this.shouldSplitWal = shouldSplitWal;
103    this.carryingMeta = carryingMeta;
104    this.setOwner(env.getRequestUser());
105  }
106
107  /**
108   * Used when deserializing from a procedure store; we'll construct one of these then call
109   * #deserializeStateData(InputStream). Do not use directly.
110   */
111  public ServerCrashProcedure() {
112  }
113
114  public boolean isInRecoverMetaState() {
115    return getCurrentState() == ServerCrashState.SERVER_CRASH_PROCESS_META;
116  }
117
118  @Override
119  protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
120      throws ProcedureSuspendedException, ProcedureYieldException {
121    final MasterServices services = env.getMasterServices();
122    final AssignmentManager am = env.getAssignmentManager();
123    updateProgress(true);
124    // HBASE-14802 If we have not yet notified that we are processing a dead server, do so now.
125    // This adds server to the DeadServer processing list but not to the DeadServers list.
126    // Server gets removed from processing list below on procedure successful finish.
127    if (!notifiedDeadServer) {
128      services.getServerManager().getDeadServers().processing(serverName);
129      notifiedDeadServer = true;
130    }
131
132    switch (state) {
133      case SERVER_CRASH_START:
134      case SERVER_CRASH_SPLIT_META_LOGS:
135      case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
136      case SERVER_CRASH_ASSIGN_META:
137        break;
138      default:
139        // If hbase:meta is not assigned, yield.
140        if (env.getAssignmentManager().waitMetaLoaded(this)) {
141          throw new ProcedureSuspendedException();
142        }
143    }
144    try {
145      switch (state) {
146        case SERVER_CRASH_START:
147          LOG.info("Start " + this);
148          // If carrying meta, process it first. Else, get list of regions on crashed server.
149          if (this.carryingMeta) {
150            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
151          } else {
152            setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
153          }
154          break;
155        case SERVER_CRASH_SPLIT_META_LOGS:
156          if (env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
157            DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
158            splitMetaLogs(env);
159            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
160          } else {
161            am.getRegionStates().metaLogSplitting(serverName);
162            addChildProcedure(createSplittingWalProcedures(env, true));
163            setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR);
164          }
165          break;
166        case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
167          if(isSplittingDone(env, true)){
168            cleanupSplitDir(env);
169            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
170            am.getRegionStates().metaLogSplit(serverName);
171          } else {
172            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
173          }
174          break;
175        case SERVER_CRASH_ASSIGN_META:
176          assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO));
177          setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
178          break;
179        case SERVER_CRASH_GET_REGIONS:
180          this.regionsOnCrashedServer = getRegionsOnCrashedServer(env);
181          // Where to go next? Depends on whether we should split logs at all or
182          // if we should do distributed log splitting.
183          if (regionsOnCrashedServer != null) {
184            LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size());
185            if (LOG.isTraceEnabled()) {
186              this.regionsOnCrashedServer.stream().forEach(ri -> LOG.trace(ri.getShortNameToLog()));
187            }
188          }
189          if (!this.shouldSplitWal) {
190            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
191          } else {
192            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
193          }
194          break;
195        case SERVER_CRASH_SPLIT_LOGS:
196          if (env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
197            DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
198            splitLogs(env);
199            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
200          } else {
201            am.getRegionStates().logSplitting(this.serverName);
202            addChildProcedure(createSplittingWalProcedures(env, false));
203            setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_WALS_DIR);
204          }
205          break;
206        case SERVER_CRASH_DELETE_SPLIT_WALS_DIR:
207          if (isSplittingDone(env, false)) {
208            cleanupSplitDir(env);
209            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
210            am.getRegionStates().logSplit(this.serverName);
211          } else {
212            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
213          }
214          break;
215        case SERVER_CRASH_ASSIGN:
216          // If no regions to assign, skip assign and skip to the finish.
217          // Filter out meta regions. Those are handled elsewhere in this procedure.
218          // Filter changes this.regionsOnCrashedServer.
219          if (filterDefaultMetaRegions()) {
220            if (LOG.isTraceEnabled()) {
221              LOG
222                .trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer) +
223                  ", " + this + "; cycles=" + getCycles());
224            }
225            assignRegions(env, regionsOnCrashedServer);
226          }
227          setNextState(ServerCrashState.SERVER_CRASH_FINISH);
228          break;
229        case SERVER_CRASH_HANDLE_RIT2:
230          // Noop. Left in place because we used to call handleRIT here for a second time
231          // but no longer necessary since HBASE-20634.
232          setNextState(ServerCrashState.SERVER_CRASH_FINISH);
233          break;
234        case SERVER_CRASH_FINISH:
235          LOG.info("removed crashed server {} after splitting done", serverName);
236          services.getAssignmentManager().getRegionStates().removeServer(serverName);
237          services.getServerManager().getDeadServers().finish(serverName);
238          updateProgress(true);
239          return Flow.NO_MORE_STATE;
240        default:
241          throw new UnsupportedOperationException("unhandled state=" + state);
242      }
243    } catch (IOException e) {
244      LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e);
245    }
246    return Flow.HAS_MORE_STATE;
247  }
248
249  /**
250   * @return List of Regions on crashed server.
251   */
252  List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) {
253    return env.getMasterServices().getAssignmentManager().getRegionsOnServer(serverName);
254  }
255
256  private void cleanupSplitDir(MasterProcedureEnv env) {
257    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
258    try {
259      splitWALManager.deleteWALDir(serverName);
260    } catch (IOException e) {
261      LOG.warn("Remove WAL directory of server {} failed, ignore...", serverName, e);
262    }
263  }
264
265  private boolean isSplittingDone(MasterProcedureEnv env, boolean splitMeta) {
266    LOG.debug("check if splitting WALs of {} done? isMeta: {}", serverName, splitMeta);
267    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
268    try {
269      return splitWALManager.getWALsToSplit(serverName, splitMeta).size() == 0;
270    } catch (IOException e) {
271      LOG.warn("get filelist of serverName {} failed, retry...", serverName, e);
272      return false;
273    }
274  }
275
276  private Procedure[] createSplittingWalProcedures(MasterProcedureEnv env, boolean splitMeta)
277      throws IOException {
278    LOG.info("Splitting WALs {}, isMeta: {}", this, splitMeta);
279    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
280    List<Procedure> procedures = splitWALManager.splitWALs(serverName, splitMeta);
281    return procedures.toArray(new Procedure[procedures.size()]);
282  }
283
284  private boolean filterDefaultMetaRegions() {
285    if (regionsOnCrashedServer == null) {
286      return false;
287    }
288    regionsOnCrashedServer.removeIf(this::isDefaultMetaRegion);
289    return !regionsOnCrashedServer.isEmpty();
290  }
291
292  private boolean isDefaultMetaRegion(RegionInfo hri) {
293    return hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri);
294  }
295
296  private void splitMetaLogs(MasterProcedureEnv env) throws IOException {
297    LOG.debug("Splitting meta WALs {}", this);
298    MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
299    AssignmentManager am = env.getMasterServices().getAssignmentManager();
300    am.getRegionStates().metaLogSplitting(serverName);
301    mwm.splitMetaLog(serverName);
302    am.getRegionStates().metaLogSplit(serverName);
303    LOG.debug("Done splitting meta WALs {}", this);
304  }
305
306  private void splitLogs(final MasterProcedureEnv env) throws IOException {
307    LOG.debug("Splitting WALs {}", this);
308    MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
309    AssignmentManager am = env.getMasterServices().getAssignmentManager();
310    // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
311    // PROBLEM!!! WE BLOCK HERE. Can block for hours if hundreds of WALs to split and hundreds
312    // of SCPs running because big cluster crashed down.
313    am.getRegionStates().logSplitting(this.serverName);
314    mwm.splitLog(this.serverName);
315    if (!carryingMeta) {
316      mwm.archiveMetaLog(this.serverName);
317    }
318    am.getRegionStates().logSplit(this.serverName);
319    LOG.debug("Done splitting WALs {}", this);
320  }
321
322  void updateProgress(boolean updateState) {
323    String msg = "Processing ServerCrashProcedure of " + serverName;
324    if (status == null) {
325      status = TaskMonitor.get().createStatus(msg);
326      return;
327    }
328    if (currentRunningState == ServerCrashState.SERVER_CRASH_FINISH) {
329      status.markComplete(msg + " done");
330      return;
331    }
332    if (updateState) {
333      currentRunningState = getCurrentState();
334    }
335    int childrenLatch = getChildrenLatch();
336    status.setStatus(msg + " current State " + currentRunningState
337        + (childrenLatch > 0 ? "; remaining num of running child procedures = " + childrenLatch
338            : ""));
339  }
340
341  @Override
342  protected void rollbackState(MasterProcedureEnv env, ServerCrashState state)
343  throws IOException {
344    // Can't rollback.
345    throw new UnsupportedOperationException("unhandled state=" + state);
346  }
347
348  @Override
349  protected ServerCrashState getState(int stateId) {
350    return ServerCrashState.forNumber(stateId);
351  }
352
353  @Override
354  protected int getStateId(ServerCrashState state) {
355    return state.getNumber();
356  }
357
358  @Override
359  protected ServerCrashState getInitialState() {
360    return ServerCrashState.SERVER_CRASH_START;
361  }
362
363  @Override
364  protected boolean abort(MasterProcedureEnv env) {
365    // TODO
366    return false;
367  }
368
369  @Override
370  protected LockState acquireLock(final MasterProcedureEnv env) {
371    if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) {
372      return LockState.LOCK_EVENT_WAIT;
373    }
374    return LockState.LOCK_ACQUIRED;
375  }
376
377  @Override
378  protected void releaseLock(final MasterProcedureEnv env) {
379    env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName());
380  }
381
382  @Override
383  public void toStringClassDetails(StringBuilder sb) {
384    sb.append(getProcName());
385    sb.append(", splitWal=");
386    sb.append(shouldSplitWal);
387    sb.append(", meta=");
388    sb.append(carryingMeta);
389  }
390
391  @Override public String getProcName() {
392    return getClass().getSimpleName() + " " + this.serverName;
393  }
394
395  @Override
396  protected void serializeStateData(ProcedureStateSerializer serializer)
397      throws IOException {
398    super.serializeStateData(serializer);
399
400    MasterProcedureProtos.ServerCrashStateData.Builder state =
401      MasterProcedureProtos.ServerCrashStateData.newBuilder().
402      setServerName(ProtobufUtil.toServerName(this.serverName)).
403      setCarryingMeta(this.carryingMeta).
404      setShouldSplitWal(this.shouldSplitWal);
405    if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
406      for (RegionInfo hri: this.regionsOnCrashedServer) {
407        state.addRegionsOnCrashedServer(ProtobufUtil.toRegionInfo(hri));
408      }
409    }
410    serializer.serialize(state.build());
411  }
412
413  @Override
414  protected void deserializeStateData(ProcedureStateSerializer serializer)
415      throws IOException {
416    super.deserializeStateData(serializer);
417
418    MasterProcedureProtos.ServerCrashStateData state =
419        serializer.deserialize(MasterProcedureProtos.ServerCrashStateData.class);
420    this.serverName = ProtobufUtil.toServerName(state.getServerName());
421    this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false;
422    // shouldSplitWAL has a default over in pb so this invocation will always work.
423    this.shouldSplitWal = state.getShouldSplitWal();
424    int size = state.getRegionsOnCrashedServerCount();
425    if (size > 0) {
426      this.regionsOnCrashedServer = new ArrayList<>(size);
427      for (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo ri: state.getRegionsOnCrashedServerList()) {
428        this.regionsOnCrashedServer.add(ProtobufUtil.toRegionInfo(ri));
429      }
430    }
431    updateProgress(false);
432  }
433
434  @Override
435  public ServerName getServerName() {
436    return this.serverName;
437  }
438
439  @Override
440  public boolean hasMetaTableRegion() {
441    return this.carryingMeta;
442  }
443
444  @Override
445  public ServerOperationType getServerOperationType() {
446    return ServerOperationType.CRASH_HANDLER;
447  }
448
449
450  @Override
451  protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
452    // The operation is triggered internally on the server
453    // the client does not know about this procedure.
454    return false;
455  }
456
457  /**
458   * Moved out here so can be overridden by the HBCK fix-up SCP to be less strict about what
459   * it will tolerate as a 'match'.
460   * @return True if the region location in <code>rsn</code> matches that of this crashed server.
461   */
462  protected boolean isMatchingRegionLocation(RegionStateNode rsn) {
463    return this.serverName.equals(rsn.getRegionLocation());
464  }
465
466  /**
467   * Assign the regions on the crashed RS to other Rses.
468   * <p/>
469   * In this method we will go through all the RegionStateNodes of the give regions to find out
470   * whether there is already an TRSP for the region, if so we interrupt it and let it retry on
471   * other server, otherwise we will schedule a TRSP to bring the region online.
472   * <p/>
473   * We will also check whether the table for a region is enabled, if not, we will skip assigning
474   * it.
475   */
476  private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) throws IOException {
477    AssignmentManager am = env.getMasterServices().getAssignmentManager();
478    for (RegionInfo region : regions) {
479      RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region);
480      regionNode.lock();
481      try {
482        // This is possible, as when a server is dead, TRSP will fail to schedule a RemoteProcedure
483        // and then try to assign the region to a new RS. And before it has updated the region
484        // location to the new RS, we may have already called the am.getRegionsOnServer so we will
485        // consider the region is still on this crashed server. Then before we arrive here, the
486        // TRSP could have updated the region location, or even finished itself, so the region is
487        // no longer on this crashed server any more. We should not try to assign it again. Please
488        // see HBASE-23594 for more details.
489        // UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get
490        // in the way of our clearing out 'Unknown Servers'.
491        if (!isMatchingRegionLocation(regionNode)) {
492          // See HBASE-24117, though we have already changed the shutdown order, it is still worth
493          // double checking here to confirm that we do not skip assignment incorrectly.
494          if (!am.isRunning()) {
495            throw new DoNotRetryIOException(
496              "AssignmentManager has been stopped, can not process assignment any more");
497          }
498          LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...",
499            this, regionNode, serverName);
500          continue;
501        }
502        if (regionNode.getProcedure() != null) {
503          LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode);
504          regionNode.getProcedure().serverCrashed(env, regionNode, getServerName());
505          continue;
506        }
507        if (env.getMasterServices().getTableStateManager()
508          .isTableState(regionNode.getTable(), TableState.State.DISABLING)) {
509          // We need to change the state here otherwise the TRSP scheduled by DTP will try to
510          // close the region from a dead server and will never succeed. Please see HBASE-23636
511          // for more details.
512          env.getAssignmentManager().regionClosedAbnormally(regionNode);
513          LOG.info("{} found table disabling for region {}, set it state to ABNORMALLY_CLOSED.",
514            this, regionNode);
515          continue;
516        }
517        if (env.getMasterServices().getTableStateManager()
518          .isTableState(regionNode.getTable(), TableState.State.DISABLED)) {
519          // This should not happen, table disabled but has regions on server.
520          LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this);
521          continue;
522        }
523        // force to assign to a new candidate server, see HBASE-23035 for more details.
524        TransitRegionStateProcedure proc =
525          TransitRegionStateProcedure.assign(env, region, true, null);
526        regionNode.setProcedure(proc);
527        addChildProcedure(proc);
528      } finally {
529        regionNode.unlock();
530      }
531    }
532  }
533
534  @Override
535  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
536    return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics();
537  }
538
539  @Override
540  protected boolean holdLock(MasterProcedureEnv env) {
541    return true;
542  }
543
544  public static void updateProgress(MasterProcedureEnv env, long parentId) {
545    if (parentId == NO_PROC_ID) {
546      return;
547    }
548    Procedure parentProcedure =
549        env.getMasterServices().getMasterProcedureExecutor().getProcedure(parentId);
550    if (parentProcedure != null && parentProcedure instanceof ServerCrashProcedure) {
551      ((ServerCrashProcedure) parentProcedure).updateProgress(false);
552    }
553  }
554}