001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK; 021import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.List; 027import org.apache.hadoop.hbase.DoNotRetryIOException; 028import org.apache.hadoop.hbase.ServerName; 029import org.apache.hadoop.hbase.client.RegionInfo; 030import org.apache.hadoop.hbase.client.RegionInfoBuilder; 031import org.apache.hadoop.hbase.client.RegionReplicaUtil; 032import org.apache.hadoop.hbase.client.TableState; 033import org.apache.hadoop.hbase.master.MasterServices; 034import org.apache.hadoop.hbase.master.MasterWalManager; 035import org.apache.hadoop.hbase.master.SplitWALManager; 036import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 037import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 038import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 039import org.apache.hadoop.hbase.monitoring.MonitoredTask; 040import org.apache.hadoop.hbase.monitoring.TaskMonitor; 041import org.apache.hadoop.hbase.procedure2.Procedure; 042import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 043import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 044import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 045import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 046import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; 047import org.apache.yetus.audience.InterfaceAudience; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 052import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 053import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState; 054 055/** 056 * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called 057 * ServerShutdownHandler. 058 * 059 * <p>The procedure flow varies dependent on whether meta is assigned and if we are to split logs. 060 * 061 * <p>We come in here after ServerManager has noticed a server has expired. Procedures 062 * queued on the rpc should have been notified about fail and should be concurrently 063 * getting themselves ready to assign elsewhere. 064 */ 065@InterfaceAudience.Private 066public class ServerCrashProcedure 067 extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState> 068 implements ServerProcedureInterface { 069 private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class); 070 071 /** 072 * Name of the crashed server to process. 073 */ 074 private ServerName serverName; 075 076 /** 077 * Whether DeadServer knows that we are processing it. 078 */ 079 private boolean notifiedDeadServer = false; 080 081 /** 082 * Regions that were on the crashed server. 083 */ 084 private List<RegionInfo> regionsOnCrashedServer; 085 086 private boolean carryingMeta = false; 087 private boolean shouldSplitWal; 088 private MonitoredTask status; 089 // currentRunningState is updated when ServerCrashProcedure get scheduled, child procedures update 090 // progress will not update the state because the actual state is overwritten by its next state 091 private ServerCrashState currentRunningState = getInitialState(); 092 093 /** 094 * Call this constructor queuing up a Procedure. 095 * @param serverName Name of the crashed server. 096 * @param shouldSplitWal True if we should split WALs as part of crashed server processing. 097 * @param carryingMeta True if carrying hbase:meta table region. 098 */ 099 public ServerCrashProcedure(final MasterProcedureEnv env, final ServerName serverName, 100 final boolean shouldSplitWal, final boolean carryingMeta) { 101 this.serverName = serverName; 102 this.shouldSplitWal = shouldSplitWal; 103 this.carryingMeta = carryingMeta; 104 this.setOwner(env.getRequestUser()); 105 } 106 107 /** 108 * Used when deserializing from a procedure store; we'll construct one of these then call 109 * #deserializeStateData(InputStream). Do not use directly. 110 */ 111 public ServerCrashProcedure() { 112 } 113 114 public boolean isInRecoverMetaState() { 115 return getCurrentState() == ServerCrashState.SERVER_CRASH_PROCESS_META; 116 } 117 118 @Override 119 protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state) 120 throws ProcedureSuspendedException, ProcedureYieldException { 121 final MasterServices services = env.getMasterServices(); 122 final AssignmentManager am = env.getAssignmentManager(); 123 updateProgress(true); 124 // HBASE-14802 If we have not yet notified that we are processing a dead server, do so now. 125 // This adds server to the DeadServer processing list but not to the DeadServers list. 126 // Server gets removed from processing list below on procedure successful finish. 127 if (!notifiedDeadServer) { 128 services.getServerManager().getDeadServers().processing(serverName); 129 notifiedDeadServer = true; 130 } 131 132 switch (state) { 133 case SERVER_CRASH_START: 134 case SERVER_CRASH_SPLIT_META_LOGS: 135 case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR: 136 case SERVER_CRASH_ASSIGN_META: 137 break; 138 default: 139 // If hbase:meta is not assigned, yield. 140 if (env.getAssignmentManager().waitMetaLoaded(this)) { 141 throw new ProcedureSuspendedException(); 142 } 143 } 144 try { 145 switch (state) { 146 case SERVER_CRASH_START: 147 LOG.info("Start " + this); 148 // If carrying meta, process it first. Else, get list of regions on crashed server. 149 if (this.carryingMeta) { 150 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS); 151 } else { 152 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); 153 } 154 break; 155 case SERVER_CRASH_SPLIT_META_LOGS: 156 if (env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, 157 DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) { 158 splitMetaLogs(env); 159 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META); 160 } else { 161 am.getRegionStates().metaLogSplitting(serverName); 162 addChildProcedure(createSplittingWalProcedures(env, true)); 163 setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR); 164 } 165 break; 166 case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR: 167 if(isSplittingDone(env, true)){ 168 cleanupSplitDir(env); 169 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META); 170 am.getRegionStates().metaLogSplit(serverName); 171 } else { 172 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS); 173 } 174 break; 175 case SERVER_CRASH_ASSIGN_META: 176 assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)); 177 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); 178 break; 179 case SERVER_CRASH_GET_REGIONS: 180 this.regionsOnCrashedServer = getRegionsOnCrashedServer(env); 181 // Where to go next? Depends on whether we should split logs at all or 182 // if we should do distributed log splitting. 183 if (regionsOnCrashedServer != null) { 184 LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size()); 185 if (LOG.isTraceEnabled()) { 186 this.regionsOnCrashedServer.stream().forEach(ri -> LOG.trace(ri.getShortNameToLog())); 187 } 188 } 189 if (!this.shouldSplitWal) { 190 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 191 } else { 192 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); 193 } 194 break; 195 case SERVER_CRASH_SPLIT_LOGS: 196 if (env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, 197 DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) { 198 splitLogs(env); 199 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 200 } else { 201 am.getRegionStates().logSplitting(this.serverName); 202 addChildProcedure(createSplittingWalProcedures(env, false)); 203 setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_WALS_DIR); 204 } 205 break; 206 case SERVER_CRASH_DELETE_SPLIT_WALS_DIR: 207 if (isSplittingDone(env, false)) { 208 cleanupSplitDir(env); 209 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 210 am.getRegionStates().logSplit(this.serverName); 211 } else { 212 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); 213 } 214 break; 215 case SERVER_CRASH_ASSIGN: 216 // If no regions to assign, skip assign and skip to the finish. 217 // Filter out meta regions. Those are handled elsewhere in this procedure. 218 // Filter changes this.regionsOnCrashedServer. 219 if (filterDefaultMetaRegions()) { 220 if (LOG.isTraceEnabled()) { 221 LOG 222 .trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer) + 223 ", " + this + "; cycles=" + getCycles()); 224 } 225 assignRegions(env, regionsOnCrashedServer); 226 } 227 setNextState(ServerCrashState.SERVER_CRASH_FINISH); 228 break; 229 case SERVER_CRASH_HANDLE_RIT2: 230 // Noop. Left in place because we used to call handleRIT here for a second time 231 // but no longer necessary since HBASE-20634. 232 setNextState(ServerCrashState.SERVER_CRASH_FINISH); 233 break; 234 case SERVER_CRASH_FINISH: 235 LOG.info("removed crashed server {} after splitting done", serverName); 236 services.getAssignmentManager().getRegionStates().removeServer(serverName); 237 services.getServerManager().getDeadServers().finish(serverName); 238 updateProgress(true); 239 return Flow.NO_MORE_STATE; 240 default: 241 throw new UnsupportedOperationException("unhandled state=" + state); 242 } 243 } catch (IOException e) { 244 LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e); 245 } 246 return Flow.HAS_MORE_STATE; 247 } 248 249 /** 250 * @return List of Regions on crashed server. 251 */ 252 List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) { 253 return env.getMasterServices().getAssignmentManager().getRegionsOnServer(serverName); 254 } 255 256 private void cleanupSplitDir(MasterProcedureEnv env) { 257 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 258 try { 259 splitWALManager.deleteWALDir(serverName); 260 } catch (IOException e) { 261 LOG.warn("Remove WAL directory of server {} failed, ignore...", serverName, e); 262 } 263 } 264 265 private boolean isSplittingDone(MasterProcedureEnv env, boolean splitMeta) { 266 LOG.debug("check if splitting WALs of {} done? isMeta: {}", serverName, splitMeta); 267 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 268 try { 269 return splitWALManager.getWALsToSplit(serverName, splitMeta).size() == 0; 270 } catch (IOException e) { 271 LOG.warn("get filelist of serverName {} failed, retry...", serverName, e); 272 return false; 273 } 274 } 275 276 private Procedure[] createSplittingWalProcedures(MasterProcedureEnv env, boolean splitMeta) 277 throws IOException { 278 LOG.info("Splitting WALs {}, isMeta: {}", this, splitMeta); 279 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 280 List<Procedure> procedures = splitWALManager.splitWALs(serverName, splitMeta); 281 return procedures.toArray(new Procedure[procedures.size()]); 282 } 283 284 private boolean filterDefaultMetaRegions() { 285 if (regionsOnCrashedServer == null) { 286 return false; 287 } 288 regionsOnCrashedServer.removeIf(this::isDefaultMetaRegion); 289 return !regionsOnCrashedServer.isEmpty(); 290 } 291 292 private boolean isDefaultMetaRegion(RegionInfo hri) { 293 return hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri); 294 } 295 296 private void splitMetaLogs(MasterProcedureEnv env) throws IOException { 297 LOG.debug("Splitting meta WALs {}", this); 298 MasterWalManager mwm = env.getMasterServices().getMasterWalManager(); 299 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 300 am.getRegionStates().metaLogSplitting(serverName); 301 mwm.splitMetaLog(serverName); 302 am.getRegionStates().metaLogSplit(serverName); 303 LOG.debug("Done splitting meta WALs {}", this); 304 } 305 306 private void splitLogs(final MasterProcedureEnv env) throws IOException { 307 LOG.debug("Splitting WALs {}", this); 308 MasterWalManager mwm = env.getMasterServices().getMasterWalManager(); 309 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 310 // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running. 311 // PROBLEM!!! WE BLOCK HERE. Can block for hours if hundreds of WALs to split and hundreds 312 // of SCPs running because big cluster crashed down. 313 am.getRegionStates().logSplitting(this.serverName); 314 mwm.splitLog(this.serverName); 315 if (!carryingMeta) { 316 mwm.archiveMetaLog(this.serverName); 317 } 318 am.getRegionStates().logSplit(this.serverName); 319 LOG.debug("Done splitting WALs {}", this); 320 } 321 322 void updateProgress(boolean updateState) { 323 String msg = "Processing ServerCrashProcedure of " + serverName; 324 if (status == null) { 325 status = TaskMonitor.get().createStatus(msg); 326 return; 327 } 328 if (currentRunningState == ServerCrashState.SERVER_CRASH_FINISH) { 329 status.markComplete(msg + " done"); 330 return; 331 } 332 if (updateState) { 333 currentRunningState = getCurrentState(); 334 } 335 int childrenLatch = getChildrenLatch(); 336 status.setStatus(msg + " current State " + currentRunningState 337 + (childrenLatch > 0 ? "; remaining num of running child procedures = " + childrenLatch 338 : "")); 339 } 340 341 @Override 342 protected void rollbackState(MasterProcedureEnv env, ServerCrashState state) 343 throws IOException { 344 // Can't rollback. 345 throw new UnsupportedOperationException("unhandled state=" + state); 346 } 347 348 @Override 349 protected ServerCrashState getState(int stateId) { 350 return ServerCrashState.forNumber(stateId); 351 } 352 353 @Override 354 protected int getStateId(ServerCrashState state) { 355 return state.getNumber(); 356 } 357 358 @Override 359 protected ServerCrashState getInitialState() { 360 return ServerCrashState.SERVER_CRASH_START; 361 } 362 363 @Override 364 protected boolean abort(MasterProcedureEnv env) { 365 // TODO 366 return false; 367 } 368 369 @Override 370 protected LockState acquireLock(final MasterProcedureEnv env) { 371 if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) { 372 return LockState.LOCK_EVENT_WAIT; 373 } 374 return LockState.LOCK_ACQUIRED; 375 } 376 377 @Override 378 protected void releaseLock(final MasterProcedureEnv env) { 379 env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName()); 380 } 381 382 @Override 383 public void toStringClassDetails(StringBuilder sb) { 384 sb.append(getProcName()); 385 sb.append(", splitWal="); 386 sb.append(shouldSplitWal); 387 sb.append(", meta="); 388 sb.append(carryingMeta); 389 } 390 391 @Override public String getProcName() { 392 return getClass().getSimpleName() + " " + this.serverName; 393 } 394 395 @Override 396 protected void serializeStateData(ProcedureStateSerializer serializer) 397 throws IOException { 398 super.serializeStateData(serializer); 399 400 MasterProcedureProtos.ServerCrashStateData.Builder state = 401 MasterProcedureProtos.ServerCrashStateData.newBuilder(). 402 setServerName(ProtobufUtil.toServerName(this.serverName)). 403 setCarryingMeta(this.carryingMeta). 404 setShouldSplitWal(this.shouldSplitWal); 405 if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) { 406 for (RegionInfo hri: this.regionsOnCrashedServer) { 407 state.addRegionsOnCrashedServer(ProtobufUtil.toRegionInfo(hri)); 408 } 409 } 410 serializer.serialize(state.build()); 411 } 412 413 @Override 414 protected void deserializeStateData(ProcedureStateSerializer serializer) 415 throws IOException { 416 super.deserializeStateData(serializer); 417 418 MasterProcedureProtos.ServerCrashStateData state = 419 serializer.deserialize(MasterProcedureProtos.ServerCrashStateData.class); 420 this.serverName = ProtobufUtil.toServerName(state.getServerName()); 421 this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false; 422 // shouldSplitWAL has a default over in pb so this invocation will always work. 423 this.shouldSplitWal = state.getShouldSplitWal(); 424 int size = state.getRegionsOnCrashedServerCount(); 425 if (size > 0) { 426 this.regionsOnCrashedServer = new ArrayList<>(size); 427 for (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo ri: state.getRegionsOnCrashedServerList()) { 428 this.regionsOnCrashedServer.add(ProtobufUtil.toRegionInfo(ri)); 429 } 430 } 431 updateProgress(false); 432 } 433 434 @Override 435 public ServerName getServerName() { 436 return this.serverName; 437 } 438 439 @Override 440 public boolean hasMetaTableRegion() { 441 return this.carryingMeta; 442 } 443 444 @Override 445 public ServerOperationType getServerOperationType() { 446 return ServerOperationType.CRASH_HANDLER; 447 } 448 449 450 @Override 451 protected boolean shouldWaitClientAck(MasterProcedureEnv env) { 452 // The operation is triggered internally on the server 453 // the client does not know about this procedure. 454 return false; 455 } 456 457 /** 458 * Moved out here so can be overridden by the HBCK fix-up SCP to be less strict about what 459 * it will tolerate as a 'match'. 460 * @return True if the region location in <code>rsn</code> matches that of this crashed server. 461 */ 462 protected boolean isMatchingRegionLocation(RegionStateNode rsn) { 463 return this.serverName.equals(rsn.getRegionLocation()); 464 } 465 466 /** 467 * Assign the regions on the crashed RS to other Rses. 468 * <p/> 469 * In this method we will go through all the RegionStateNodes of the give regions to find out 470 * whether there is already an TRSP for the region, if so we interrupt it and let it retry on 471 * other server, otherwise we will schedule a TRSP to bring the region online. 472 * <p/> 473 * We will also check whether the table for a region is enabled, if not, we will skip assigning 474 * it. 475 */ 476 private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) throws IOException { 477 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 478 for (RegionInfo region : regions) { 479 RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region); 480 regionNode.lock(); 481 try { 482 // This is possible, as when a server is dead, TRSP will fail to schedule a RemoteProcedure 483 // and then try to assign the region to a new RS. And before it has updated the region 484 // location to the new RS, we may have already called the am.getRegionsOnServer so we will 485 // consider the region is still on this crashed server. Then before we arrive here, the 486 // TRSP could have updated the region location, or even finished itself, so the region is 487 // no longer on this crashed server any more. We should not try to assign it again. Please 488 // see HBASE-23594 for more details. 489 // UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get 490 // in the way of our clearing out 'Unknown Servers'. 491 if (!isMatchingRegionLocation(regionNode)) { 492 // See HBASE-24117, though we have already changed the shutdown order, it is still worth 493 // double checking here to confirm that we do not skip assignment incorrectly. 494 if (!am.isRunning()) { 495 throw new DoNotRetryIOException( 496 "AssignmentManager has been stopped, can not process assignment any more"); 497 } 498 LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...", 499 this, regionNode, serverName); 500 continue; 501 } 502 if (regionNode.getProcedure() != null) { 503 LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode); 504 regionNode.getProcedure().serverCrashed(env, regionNode, getServerName()); 505 continue; 506 } 507 if (env.getMasterServices().getTableStateManager() 508 .isTableState(regionNode.getTable(), TableState.State.DISABLING)) { 509 // We need to change the state here otherwise the TRSP scheduled by DTP will try to 510 // close the region from a dead server and will never succeed. Please see HBASE-23636 511 // for more details. 512 env.getAssignmentManager().regionClosedAbnormally(regionNode); 513 LOG.info("{} found table disabling for region {}, set it state to ABNORMALLY_CLOSED.", 514 this, regionNode); 515 continue; 516 } 517 if (env.getMasterServices().getTableStateManager() 518 .isTableState(regionNode.getTable(), TableState.State.DISABLED)) { 519 // This should not happen, table disabled but has regions on server. 520 LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this); 521 continue; 522 } 523 // force to assign to a new candidate server, see HBASE-23035 for more details. 524 TransitRegionStateProcedure proc = 525 TransitRegionStateProcedure.assign(env, region, true, null); 526 regionNode.setProcedure(proc); 527 addChildProcedure(proc); 528 } finally { 529 regionNode.unlock(); 530 } 531 } 532 } 533 534 @Override 535 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 536 return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics(); 537 } 538 539 @Override 540 protected boolean holdLock(MasterProcedureEnv env) { 541 return true; 542 } 543 544 public static void updateProgress(MasterProcedureEnv env, long parentId) { 545 if (parentId == NO_PROC_ID) { 546 return; 547 } 548 Procedure parentProcedure = 549 env.getMasterServices().getMasterProcedureExecutor().getProcedure(parentId); 550 if (parentProcedure != null && parentProcedure instanceof ServerCrashProcedure) { 551 ((ServerCrashProcedure) parentProcedure).updateProgress(false); 552 } 553 } 554}