View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.procedure;
19  
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.InterruptedIOException;
23  import java.io.OutputStream;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Set;
29  import java.util.concurrent.locks.Lock;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.client.ClusterConnection;
37  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
38  import org.apache.hadoop.hbase.master.AssignmentManager;
39  import org.apache.hadoop.hbase.master.MasterFileSystem;
40  import org.apache.hadoop.hbase.master.MasterServices;
41  import org.apache.hadoop.hbase.master.RegionState;
42  import org.apache.hadoop.hbase.master.RegionStates;
43  import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
44  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
45  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo;
47  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
48  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashState;
49  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
50  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
51  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
52  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
53  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
54  import org.apache.hadoop.util.StringUtils;
55  import org.apache.zookeeper.KeeperException;
56  
57  /**
58   * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called
59   * ServerShutdownHandler.
60   *
61   * <p>The procedure flow varies dependent on whether meta is assigned, if we are
62   * doing distributed log replay versus distributed log splitting, and if we are to split logs at
63   * all.
64   *
65   * <p>This procedure asks that all crashed servers get processed equally; we yield after the
66   * completion of each successful flow step. We do this so that we do not 'deadlock' waiting on
67   * a region assignment so we can replay edits which could happen if a region moved there are edits
68   * on two servers for replay.
69   *
70   * <p>TODO: ASSIGN and WAIT_ON_ASSIGN (at least) are not idempotent. Revisit when assign is pv2.
71   * TODO: We do not have special handling for system tables.
72   */
73  public class ServerCrashProcedure
74  extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState>
75  implements ServerProcedureInterface {
76    private static final Log LOG = LogFactory.getLog(ServerCrashProcedure.class);
77  
78    /**
79     * Configuration key to set how long to wait in ms doing a quick check on meta state.
80     */
81    public static final String KEY_SHORT_WAIT_ON_META =
82        "hbase.master.servercrash.short.wait.on.meta.ms";
83  
84    public static final int DEFAULT_SHORT_WAIT_ON_META = 1000;
85  
86    /**
87     * Configuration key to set how many retries to cycle before we give up on meta.
88     * Each attempt will wait at least {@link #KEY_SHORT_WAIT_ON_META} milliseconds.
89     */
90    public static final String KEY_RETRIES_ON_META =
91        "hbase.master.servercrash.meta.retries";
92  
93    public static final int DEFAULT_RETRIES_ON_META = 10;
94  
95    /**
96     * Configuration key to set how long to wait in ms on regions in transition.
97     */
98    public static final String KEY_WAIT_ON_RIT =
99        "hbase.master.servercrash.wait.on.rit.ms";
100 
101   public static final int DEFAULT_WAIT_ON_RIT = 30000;
102 
103   private static final Set<HRegionInfo> META_REGION_SET = new HashSet<HRegionInfo>();
104   static {
105     META_REGION_SET.add(HRegionInfo.FIRST_META_REGIONINFO);
106   }
107 
108   /**
109    * Name of the crashed server to process.
110    */
111   private ServerName serverName;
112 
113   /**
114    * Whether DeadServer knows that we are processing it.
115    */
116   private boolean notifiedDeadServer = false;
117 
118   /**
119    * Regions that were on the crashed server.
120    */
121   private Set<HRegionInfo> regionsOnCrashedServer;
122 
123   /**
124    * Regions assigned. Usually some subset of {@link #regionsOnCrashedServer}.
125    */
126   private List<HRegionInfo> regionsAssigned;
127 
128   private boolean distributedLogReplay = false;
129   private boolean carryingMeta = false;
130   private boolean shouldSplitWal;
131 
132   /**
133    * Cycles on same state. Good for figuring if we are stuck.
134    */
135   private int cycles = 0;
136 
137   /**
138    * Ordinal of the previous state. So we can tell if we are progressing or not. TODO: if useful,
139    * move this back up into StateMachineProcedure
140    */
141   private int previousState;
142 
143   /**
144    * Call this constructor queuing up a Procedure.
145    * @param serverName Name of the crashed server.
146    * @param shouldSplitWal True if we should split WALs as part of crashed server processing.
147    * @param carryingMeta True if carrying hbase:meta table region.
148    */
149   public ServerCrashProcedure(
150       final MasterProcedureEnv env,
151       final ServerName serverName,
152       final boolean shouldSplitWal,
153       final boolean carryingMeta) {
154     this.serverName = serverName;
155     this.shouldSplitWal = shouldSplitWal;
156     this.carryingMeta = carryingMeta;
157     this.setOwner(env.getRequestUser().getShortName());
158   }
159 
160   /**
161    * Used when deserializing from a procedure store; we'll construct one of these then call
162    * {@link #deserializeStateData(InputStream)}. Do not use directly.
163    */
164   public ServerCrashProcedure() {
165     super();
166   }
167 
168   private void throwProcedureYieldException(final String msg) throws ProcedureYieldException {
169     String logMsg = msg + "; cycle=" + this.cycles + ", running for " +
170         StringUtils.formatTimeDiff(System.currentTimeMillis(), getStartTime());
171     // The procedure executor logs ProcedureYieldException at trace level. For now, log these
172     // yields for server crash processing at DEBUG. Revisit when stable.
173     if (LOG.isDebugEnabled()) LOG.debug(logMsg);
174     throw new ProcedureYieldException(logMsg);
175   }
176 
177   @Override
178   protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
179       throws ProcedureYieldException {
180     if (LOG.isTraceEnabled()) {
181       LOG.trace(state);
182     }
183     // Keep running count of cycles
184     if (state.ordinal() != this.previousState) {
185       this.previousState = state.ordinal();
186       this.cycles = 0;
187     } else {
188       this.cycles++;
189     }
190     MasterServices services = env.getMasterServices();
191     // Is master fully online? If not, yield. No processing of servers unless master is up
192     if (!services.getAssignmentManager().isFailoverCleanupDone()) {
193       throwProcedureYieldException("Waiting on master failover to complete");
194     }
195     // HBASE-14802
196     // If we have not yet notified that we are processing a dead server, we should do now.
197     if (!notifiedDeadServer) {
198       services.getServerManager().getDeadServers().notifyServer(serverName);
199       notifiedDeadServer = true;
200     }
201 
202     try {
203       switch (state) {
204       case SERVER_CRASH_START:
205         LOG.info("Start processing crashed " + this.serverName);
206         start(env);
207         // If carrying meta, process it first. Else, get list of regions on crashed server.
208         if (this.carryingMeta) setNextState(ServerCrashState.SERVER_CRASH_PROCESS_META);
209         else setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
210         break;
211 
212       case SERVER_CRASH_GET_REGIONS:
213         // If hbase:meta is not assigned, yield.
214         if (!isMetaAssignedQuickTest(env)) {
215           // isMetaAssignedQuickTest does not really wait. Let's delay a little before
216           // another round of execution.
217           long wait =
218               env.getMasterConfiguration().getLong(KEY_SHORT_WAIT_ON_META,
219                 DEFAULT_SHORT_WAIT_ON_META);
220           wait = wait / 10;
221           Thread.sleep(wait);
222           throwProcedureYieldException("Waiting on hbase:meta assignment");
223         }
224         this.regionsOnCrashedServer =
225             services.getAssignmentManager().getRegionStates().getServerRegions(this.serverName);
226         // Where to go next? Depends on whether we should split logs at all or if we should do
227         // distributed log splitting (DLS) vs distributed log replay (DLR).
228         if (!this.shouldSplitWal) {
229           setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
230         } else if (this.distributedLogReplay) {
231           setNextState(ServerCrashState.SERVER_CRASH_PREPARE_LOG_REPLAY);
232         } else {
233           setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
234         }
235         break;
236 
237       case SERVER_CRASH_PROCESS_META:
238         // If we fail processing hbase:meta, yield.
239         if (!processMeta(env)) {
240           throwProcedureYieldException("Waiting on regions-in-transition to clear");
241         }
242         setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
243         break;
244 
245       case SERVER_CRASH_PREPARE_LOG_REPLAY:
246         prepareLogReplay(env, this.regionsOnCrashedServer);
247         setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
248         break;
249 
250       case SERVER_CRASH_SPLIT_LOGS:
251         splitLogs(env);
252         // If DLR, go to FINISH. Otherwise, if DLS, go to SERVER_CRASH_ASSIGN
253         if (this.distributedLogReplay) setNextState(ServerCrashState.SERVER_CRASH_FINISH);
254         else setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
255         break;
256 
257       case SERVER_CRASH_ASSIGN:
258         List<HRegionInfo> regionsToAssign = calcRegionsToAssign(env);
259 
260         // Assign may not be idempotent. SSH used to requeue the SSH if we got an IOE assigning
261         // which is what we are mimicing here but it looks prone to double assignment if assign
262         // fails midway. TODO: Test.
263 
264         // If no regions to assign, skip assign and skip to the finish.
265         boolean regions = regionsToAssign != null && !regionsToAssign.isEmpty();
266         if (regions) {
267           this.regionsAssigned = regionsToAssign;
268           if (!assign(env, regionsToAssign)) {
269             throwProcedureYieldException("Failed assign; will retry");
270           }
271         }
272         if (this.shouldSplitWal && distributedLogReplay) {
273           // Take this route even if there are apparently no regions assigned. This may be our
274           // second time through here; i.e. we assigned and crashed just about here. On second
275           // time through, there will be no regions because we assigned them in the previous step.
276           // Even though no regions, we need to go through here to clean up the DLR zk markers.
277           setNextState(ServerCrashState.SERVER_CRASH_WAIT_ON_ASSIGN);
278         } else {
279           setNextState(ServerCrashState.SERVER_CRASH_FINISH);
280         }
281         break;
282 
283       case SERVER_CRASH_WAIT_ON_ASSIGN:
284         // TODO: The list of regionsAssigned may be more than we actually assigned. See down in
285         // AM #1629 around 'if (regionStates.wasRegionOnDeadServer(encodedName)) {' where where we
286         // will skip assigning a region because it is/was on a dead server. Should never happen!
287         // It was on this server. Worst comes to worst, we'll still wait here till other server is
288         // processed.
289 
290         // If the wait on assign failed, yield -- if we have regions to assign.
291         if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
292           if (!waitOnAssign(env, this.regionsAssigned)) {
293             throwProcedureYieldException("Waiting on region assign");
294           }
295         }
296         setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
297         break;
298 
299       case SERVER_CRASH_FINISH:
300         LOG.info("Finished processing of crashed " + serverName);
301         services.getServerManager().getDeadServers().finish(serverName);
302         return Flow.NO_MORE_STATE;
303 
304       default:
305         throw new UnsupportedOperationException("unhandled state=" + state);
306       }
307     } catch (ProcedureYieldException e) {
308       LOG.warn("Failed serverName=" + this.serverName + ", state=" + state + "; retry "
309           + e.getMessage());
310       throw e;
311     } catch (IOException e) {
312       LOG.warn("Failed serverName=" + this.serverName + ", state=" + state + "; retry", e);
313     } catch (InterruptedException e) {
314       // TODO: Make executor allow IEs coming up out of execute.
315       LOG.warn("Interrupted serverName=" + this.serverName + ", state=" + state + "; retry", e);
316       Thread.currentThread().interrupt();
317     }
318     return Flow.HAS_MORE_STATE;
319   }
320 
321   /**
322    * Start processing of crashed server. In here we'll just set configs. and return.
323    * @param env
324    * @throws IOException
325    */
326   private void start(final MasterProcedureEnv env) throws IOException {
327     MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
328     // Set recovery mode late. This is what the old ServerShutdownHandler used do.
329     mfs.setLogRecoveryMode();
330     this.distributedLogReplay = mfs.getLogRecoveryMode() == RecoveryMode.LOG_REPLAY;
331   }
332 
333   /**
334    * @param env
335    * @return False if we fail to assign and split logs on meta ('process').
336    * @throws IOException
337    * @throws InterruptedException
338    */
339   private boolean processMeta(final MasterProcedureEnv env)
340   throws IOException {
341     if (LOG.isDebugEnabled()) LOG.debug("Processing hbase:meta that was on " + this.serverName);
342     MasterServices services = env.getMasterServices();
343     MasterFileSystem mfs = services.getMasterFileSystem();
344     AssignmentManager am = services.getAssignmentManager();
345     HRegionInfo metaHRI = HRegionInfo.FIRST_META_REGIONINFO;
346     if (this.shouldSplitWal) {
347       if (this.distributedLogReplay) {
348         prepareLogReplay(env, META_REGION_SET);
349       } else {
350         // TODO: Matteo. We BLOCK here but most important thing to be doing at this moment.
351         mfs.splitMetaLog(serverName);
352         am.getRegionStates().logSplit(metaHRI);
353       }
354     }
355 
356     // Assign meta if still carrying it. Check again: region may be assigned because of RIT timeout
357     boolean processed = true;
358     boolean shouldAssignMeta = false;
359     AssignmentManager.ServerHostRegion rsCarryingMetaRegion = am.isCarryingMeta(serverName);
360       switch (rsCarryingMetaRegion) {
361         case HOSTING_REGION:
362           LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
363           am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
364           shouldAssignMeta = true;
365           break;
366         case UNKNOWN:
367           if (!services.getMetaTableLocator().isLocationAvailable(services.getZooKeeper())) {
368             // the meta location as per master is null. This could happen in case when meta
369             // assignment in previous run failed, while meta znode has been updated to null.
370             // We should try to assign the meta again.
371             shouldAssignMeta = true;
372             break;
373           }
374           // fall through
375         case NOT_HOSTING_REGION:
376           LOG.info("META has been assigned to otherwhere, skip assigning.");
377           break;
378         default:
379           throw new IOException("Unsupported action in MetaServerShutdownHandler");
380     }
381     if (shouldAssignMeta) {
382       // TODO: May block here if hard time figuring state of meta.
383       verifyAndAssignMetaWithRetries(env);
384       if (this.shouldSplitWal && distributedLogReplay) {
385         int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
386         if (!waitOnRegionToClearRegionsInTransition(am, metaHRI, timeout)) {
387           processed = false;
388         } else {
389           // TODO: Matteo. We BLOCK here but most important thing to be doing at this moment.
390           mfs.splitMetaLog(serverName);
391         }
392       }
393     }
394     return processed;
395   }
396 
397   /**
398    * @return True if region cleared RIT, else false if we timed out waiting.
399    * @throws InterruptedIOException
400    */
401   private boolean waitOnRegionToClearRegionsInTransition(AssignmentManager am,
402       final HRegionInfo hri, final int timeout)
403   throws InterruptedIOException {
404     try {
405       if (!am.waitOnRegionToClearRegionsInTransition(hri, timeout)) {
406         // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
407         // when replay happens before region assignment completes.
408         LOG.warn("Region " + hri.getEncodedName() + " didn't complete assignment in time");
409         return false;
410       }
411     } catch (InterruptedException ie) {
412       throw new InterruptedIOException("Caught " + ie +
413         " during waitOnRegionToClearRegionsInTransition for " + hri);
414     }
415     return true;
416   }
417 
418   private void prepareLogReplay(final MasterProcedureEnv env, final Set<HRegionInfo> regions)
419   throws IOException {
420     if (LOG.isDebugEnabled()) {
421       LOG.debug("Mark " + size(this.regionsOnCrashedServer) + " regions-in-recovery from " +
422         this.serverName);
423     }
424     MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
425     AssignmentManager am = env.getMasterServices().getAssignmentManager();
426     mfs.prepareLogReplay(this.serverName, regions);
427     am.getRegionStates().logSplit(this.serverName);
428   }
429 
430   private void splitLogs(final MasterProcedureEnv env) throws IOException {
431     if (LOG.isDebugEnabled()) {
432       LOG.debug("Splitting logs from " + serverName + "; region count=" +
433         size(this.regionsOnCrashedServer));
434     }
435     MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
436     AssignmentManager am = env.getMasterServices().getAssignmentManager();
437     // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
438     mfs.splitLog(this.serverName);
439     am.getRegionStates().logSplit(this.serverName);
440   }
441 
442   static int size(final Collection<HRegionInfo> hris) {
443     return hris == null? 0: hris.size();
444   }
445 
446   /**
447    * Figure out what we need to assign. Should be idempotent.
448    * @param env
449    * @return List of calculated regions to assign; may be empty or null.
450    * @throws IOException
451    */
452   private List<HRegionInfo> calcRegionsToAssign(final MasterProcedureEnv env)
453   throws IOException {
454     AssignmentManager am = env.getMasterServices().getAssignmentManager();
455     List<HRegionInfo> regionsToAssignAggregator = new ArrayList<HRegionInfo>();
456     int replicaCount = env.getMasterConfiguration().getInt(HConstants.META_REPLICAS_NUM,
457       HConstants.DEFAULT_META_REPLICA_NUM);
458     for (int i = 1; i < replicaCount; i++) {
459       HRegionInfo metaHri =
460           RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
461       if (am.isCarryingMetaReplica(this.serverName, metaHri) ==
462           AssignmentManager.ServerHostRegion.HOSTING_REGION) {
463         if (LOG.isDebugEnabled()) {
464           LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName);
465         }
466         regionsToAssignAggregator.add(metaHri);
467       }
468     }
469     // Clean out anything in regions in transition.
470     List<HRegionInfo> regionsInTransition = am.cleanOutCrashedServerReferences(serverName);
471     if (LOG.isDebugEnabled()) {
472       LOG.debug("Reassigning " + size(this.regionsOnCrashedServer) +
473         " region(s) that " + (serverName == null? "null": serverName)  +
474         " was carrying (and " + regionsInTransition.size() +
475         " regions(s) that were opening on this server)");
476     }
477     regionsToAssignAggregator.addAll(regionsInTransition);
478 
479     // Iterate regions that were on this server and figure which of these we need to reassign
480     if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
481       RegionStates regionStates = am.getRegionStates();
482       for (HRegionInfo hri: this.regionsOnCrashedServer) {
483         if (regionsInTransition.contains(hri)) continue;
484         String encodedName = hri.getEncodedName();
485         Lock lock = am.acquireRegionLock(encodedName);
486         try {
487           RegionState rit = regionStates.getRegionTransitionState(hri);
488           if (processDeadRegion(hri, am)) {
489             ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
490             if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
491               // If this region is in transition on the dead server, it must be
492               // opening or pending_open, which should have been covered by
493               // AM#cleanOutCrashedServerReferences
494               LOG.info("Skip assigning " + hri.getRegionNameAsString()
495                 + " because opened on " + addressFromAM.getServerName());
496               continue;
497             }
498             if (rit != null) {
499               if (rit.getServerName() != null && !rit.isOnServer(this.serverName)) {
500                 // Skip regions that are in transition on other server
501                 LOG.info("Skip assigning region in transition on other server" + rit);
502                 continue;
503               }
504               LOG.info("Reassigning region " + rit + " and clearing zknode if exists");
505               try {
506                 // This clears out any RIT that might be sticking around.
507                 ZKAssign.deleteNodeFailSilent(env.getMasterServices().getZooKeeper(), hri);
508               } catch (KeeperException e) {
509                 // TODO: FIX!!!! ABORTING SERVER BECAUSE COULDN"T PURGE ZNODE. This is what we
510                 // used to do but that doesn't make it right!!!
511                 env.getMasterServices().abort("Unexpected error deleting RIT " + hri, e);
512                 throw new IOException(e);
513               }
514               regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
515             } else if (regionStates.isRegionInState(
516                 hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
517               regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
518             }
519             regionsToAssignAggregator.add(hri);
520           // TODO: The below else if is different in branch-1 from master branch.
521           } else if (rit != null) {
522             if ((rit.isPendingCloseOrClosing() || rit.isOffline())
523                 && am.getTableStateManager().isTableState(hri.getTable(),
524                 ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
525                 am.getReplicasToClose().contains(hri)) {
526               // If the table was partially disabled and the RS went down, we should clear the
527               // RIT and remove the node for the region.
528               // The rit that we use may be stale in case the table was in DISABLING state
529               // but though we did assign we will not be clearing the znode in CLOSING state.
530               // Doing this will have no harm. See HBASE-5927
531               regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
532               am.deleteClosingOrClosedNode(hri, rit.getServerName());
533               am.offlineDisabledRegion(hri);
534             } else {
535               LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
536                 + rit + " not to be assigned by SSH of server " + serverName);
537             }
538           }
539         } finally {
540           lock.unlock();
541         }
542       }
543     }
544     return regionsToAssignAggregator;
545   }
546 
547   private boolean assign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
548   throws InterruptedIOException {
549     AssignmentManager am = env.getMasterServices().getAssignmentManager();
550     try {
551       am.assign(hris);
552     } catch (InterruptedException ie) {
553       LOG.error("Caught " + ie + " during round-robin assignment");
554       throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
555     } catch (IOException ioe) {
556       LOG.info("Caught " + ioe + " during region assignment, will retry");
557       return false;
558     }
559     return true;
560   }
561 
562   private boolean waitOnAssign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
563   throws InterruptedIOException {
564     int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
565     for (HRegionInfo hri: hris) {
566       // TODO: Blocks here.
567       if (!waitOnRegionToClearRegionsInTransition(env.getMasterServices().getAssignmentManager(),
568           hri, timeout)) {
569         return false;
570       }
571     }
572     return true;
573   }
574 
575   @Override
576   protected void rollbackState(MasterProcedureEnv env, ServerCrashState state)
577   throws IOException {
578     // Can't rollback.
579     throw new UnsupportedOperationException("unhandled state=" + state);
580   }
581 
582   @Override
583   protected ServerCrashState getState(int stateId) {
584     return ServerCrashState.valueOf(stateId);
585   }
586 
587   @Override
588   protected int getStateId(ServerCrashState state) {
589     return state.getNumber();
590   }
591 
592   @Override
593   protected ServerCrashState getInitialState() {
594     return ServerCrashState.SERVER_CRASH_START;
595   }
596 
597   @Override
598   protected boolean abort(MasterProcedureEnv env) {
599     // TODO
600     return false;
601   }
602 
603   @Override
604   protected boolean acquireLock(final MasterProcedureEnv env) {
605     if (env.waitServerCrashProcessingEnabled(this)) return false;
606     return env.getProcedureQueue().tryAcquireServerExclusiveLock(this, getServerName());
607   }
608 
609   @Override
610   protected void releaseLock(final MasterProcedureEnv env) {
611     env.getProcedureQueue().releaseServerExclusiveLock(this, getServerName());
612   }
613 
614   @Override
615   public void toStringClassDetails(StringBuilder sb) {
616     sb.append(getClass().getSimpleName());
617     sb.append(" serverName=");
618     sb.append(this.serverName);
619     sb.append(", shouldSplitWal=");
620     sb.append(shouldSplitWal);
621     sb.append(", carryingMeta=");
622     sb.append(carryingMeta);
623   }
624 
625   @Override
626   public void serializeStateData(final OutputStream stream) throws IOException {
627     super.serializeStateData(stream);
628 
629     MasterProcedureProtos.ServerCrashStateData.Builder state =
630       MasterProcedureProtos.ServerCrashStateData.newBuilder().
631       setServerName(ProtobufUtil.toServerName(this.serverName)).
632       setDistributedLogReplay(this.distributedLogReplay).
633       setCarryingMeta(this.carryingMeta).
634       setShouldSplitWal(this.shouldSplitWal);
635     if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
636       for (HRegionInfo hri: this.regionsOnCrashedServer) {
637         state.addRegionsOnCrashedServer(HRegionInfo.convert(hri));
638       }
639     }
640     if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
641       for (HRegionInfo hri: this.regionsAssigned) {
642         state.addRegionsAssigned(HRegionInfo.convert(hri));
643       }
644     }
645     state.build().writeDelimitedTo(stream);
646   }
647 
648   @Override
649   public void deserializeStateData(final InputStream stream) throws IOException {
650     super.deserializeStateData(stream);
651 
652     MasterProcedureProtos.ServerCrashStateData state =
653       MasterProcedureProtos.ServerCrashStateData.parseDelimitedFrom(stream);
654     this.serverName = ProtobufUtil.toServerName(state.getServerName());
655     this.distributedLogReplay = state.hasDistributedLogReplay()?
656       state.getDistributedLogReplay(): false;
657     this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false;
658     // shouldSplitWAL has a default over in pb so this invocation will always work.
659     this.shouldSplitWal = state.getShouldSplitWal();
660     int size = state.getRegionsOnCrashedServerCount();
661     if (size > 0) {
662       this.regionsOnCrashedServer = new HashSet<HRegionInfo>(size);
663       for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
664         this.regionsOnCrashedServer.add(HRegionInfo.convert(ri));
665       }
666     }
667     size = state.getRegionsAssignedCount();
668     if (size > 0) {
669       this.regionsAssigned = new ArrayList<HRegionInfo>(size);
670       for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
671         this.regionsAssigned.add(HRegionInfo.convert(ri));
672       }
673     }
674   }
675 
676   /**
677    * Process a dead region from a dead RS. Checks if the region is disabled or
678    * disabling or if the region has a partially completed split.
679    * @param hri
680    * @param assignmentManager
681    * @return Returns true if specified region should be assigned, false if not.
682    * @throws IOException
683    */
684   private static boolean processDeadRegion(HRegionInfo hri, AssignmentManager assignmentManager)
685   throws IOException {
686     boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable());
687     if (!tablePresent) {
688       LOG.info("The table " + hri.getTable() + " was deleted.  Hence not proceeding.");
689       return false;
690     }
691     // If table is not disabled but the region is offlined,
692     boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
693       ZooKeeperProtos.Table.State.DISABLED);
694     if (disabled){
695       LOG.info("The table " + hri.getTable() + " was disabled.  Hence not proceeding.");
696       return false;
697     }
698     if (hri.isOffline() && hri.isSplit()) {
699       // HBASE-7721: Split parent and daughters are inserted into hbase:meta as an atomic operation.
700       // If the meta scanner saw the parent split, then it should see the daughters as assigned
701       // to the dead server. We don't have to do anything.
702       return false;
703     }
704     boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
705       ZooKeeperProtos.Table.State.DISABLING);
706     if (disabling) {
707       LOG.info("The table " + hri.getTable() + " is disabled.  Hence not assigning region" +
708         hri.getEncodedName());
709       return false;
710     }
711     return true;
712   }
713 
714   /**
715    * If hbase:meta is not assigned already, assign.
716    * @throws IOException
717    */
718   private void verifyAndAssignMetaWithRetries(final MasterProcedureEnv env) throws IOException {
719     MasterServices services = env.getMasterServices();
720     int iTimes = services.getConfiguration().getInt(KEY_RETRIES_ON_META, DEFAULT_RETRIES_ON_META);
721     // Just reuse same time as we have for short wait on meta. Adding another config is overkill.
722     long waitTime =
723       services.getConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
724     int iFlag = 0;
725     while (true) {
726       try {
727         verifyAndAssignMeta(env);
728         break;
729       } catch (KeeperException e) {
730         services.abort("In server shutdown processing, assigning meta", e);
731         throw new IOException("Aborting", e);
732       } catch (Exception e) {
733         if (iFlag >= iTimes) {
734           services.abort("verifyAndAssignMeta failed after" + iTimes + " retries, aborting", e);
735           throw new IOException("Aborting", e);
736         }
737         try {
738           Thread.sleep(waitTime);
739         } catch (InterruptedException e1) {
740           LOG.warn("Interrupted when is the thread sleep", e1);
741           Thread.currentThread().interrupt();
742           throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
743         }
744         iFlag++;
745       }
746     }
747   }
748 
749   /**
750    * If hbase:meta is not assigned already, assign.
751    * @throws InterruptedException
752    * @throws IOException
753    * @throws KeeperException
754    */
755   private void verifyAndAssignMeta(final MasterProcedureEnv env)
756       throws InterruptedException, IOException, KeeperException {
757     MasterServices services = env.getMasterServices();
758     if (!isMetaAssignedQuickTest(env)) {
759       services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
760     } else if (serverName.equals(services.getMetaTableLocator().
761         getMetaRegionLocation(services.getZooKeeper()))) {
762       // hbase:meta seems to be still alive on the server whom master is expiring
763       // and thinks is dying. Let's re-assign the hbase:meta anyway.
764       services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
765     } else {
766       LOG.info("Skip assigning hbase:meta because it is online at "
767           + services.getMetaTableLocator().getMetaRegionLocation(services.getZooKeeper()));
768     }
769   }
770 
771   /**
772    * A quick test that hbase:meta is assigned; blocks for short time only.
773    * @return True if hbase:meta location is available and verified as good.
774    * @throws InterruptedException
775    * @throws IOException
776    */
777   private boolean isMetaAssignedQuickTest(final MasterProcedureEnv env)
778   throws InterruptedException, IOException {
779     ZooKeeperWatcher zkw = env.getMasterServices().getZooKeeper();
780     MetaTableLocator mtl = env.getMasterServices().getMetaTableLocator();
781     boolean metaAssigned = false;
782     // Is hbase:meta location available yet?
783     if (mtl.isLocationAvailable(zkw)) {
784       ClusterConnection connection = env.getMasterServices().getConnection();
785       // Is hbase:meta location good yet?
786       long timeout =
787         env.getMasterConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
788       if (mtl.verifyMetaRegionLocation(connection, zkw, timeout)) {
789         metaAssigned = true;
790       }
791     }
792     return metaAssigned;
793   }
794 
795   @Override
796   public ServerName getServerName() {
797     return this.serverName;
798   }
799 
800   @Override
801   public boolean hasMetaTableRegion() {
802     return this.carryingMeta;
803   }
804 
805   @Override
806   public ServerOperationType getServerOperationType() {
807     return ServerOperationType.CRASH_HANDLER;
808   }
809 
810   /**
811    * For this procedure, yield at end of each successful flow step so that all crashed servers
812    * can make progress rather than do the default which has each procedure running to completion
813    * before we move to the next. For crashed servers, especially if running with distributed log
814    * replay, we will want all servers to come along; we do not want the scenario where a server is
815    * stuck waiting for regions to online so it can replay edits.
816    */
817   @Override
818   protected boolean isYieldBeforeExecuteFromState(MasterProcedureEnv env, ServerCrashState state) {
819     return true;
820   }
821 
822   @Override
823   protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
824     // The operation is triggered internally on the server
825     // the client does not know about this procedure.
826     return false;
827   }
828 }