1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.procedure;
19
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.InterruptedIOException;
23 import java.io.OutputStream;
24 import java.util.ArrayList;
25 import java.util.Collection;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Set;
31 import java.util.concurrent.locks.Lock;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.HRegionInfo;
37 import org.apache.hadoop.hbase.ServerName;
38 import org.apache.hadoop.hbase.client.ClusterConnection;
39 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
40 import org.apache.hadoop.hbase.master.AssignmentManager;
41 import org.apache.hadoop.hbase.master.MasterFileSystem;
42 import org.apache.hadoop.hbase.master.MasterServices;
43 import org.apache.hadoop.hbase.master.RegionState;
44 import org.apache.hadoop.hbase.master.RegionStates;
45 import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
46 import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
47 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo;
49 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
50 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashState;
51 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
52 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
53 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
54 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
55 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
56 import org.apache.hadoop.util.StringUtils;
57 import org.apache.zookeeper.KeeperException;
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75 public class ServerCrashProcedure
76 extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState>
77 implements ServerProcedureInterface {
78 private static final Log LOG = LogFactory.getLog(ServerCrashProcedure.class);
79
80
81
82
83 public static final String KEY_SHORT_WAIT_ON_META =
84 "hbase.master.servercrash.short.wait.on.meta.ms";
85
86 public static final int DEFAULT_SHORT_WAIT_ON_META = 1000;
87
88
89
90
91
92 public static final String KEY_RETRIES_ON_META =
93 "hbase.master.servercrash.meta.retries";
94
95 public static final int DEFAULT_RETRIES_ON_META = 10;
96
97
98
99
100 public static final String KEY_WAIT_ON_RIT =
101 "hbase.master.servercrash.wait.on.rit.ms";
102
103 public static final int DEFAULT_WAIT_ON_RIT = 30000;
104
105 private static final Set<HRegionInfo> META_REGION_SET = new HashSet<HRegionInfo>();
106 static {
107 META_REGION_SET.add(HRegionInfo.FIRST_META_REGIONINFO);
108 }
109
110
111
112
113 private ServerName serverName;
114
115
116
117
118 private boolean notifiedDeadServer = false;
119
120
121
122
123 private Set<HRegionInfo> regionsOnCrashedServer;
124
125
126
127
128 private List<HRegionInfo> regionsAssigned;
129
130 private boolean distributedLogReplay = false;
131 private boolean carryingMeta = false;
132 private boolean shouldSplitWal;
133
134
135
136
137 private int cycles = 0;
138
139
140
141
142
143 private int previousState;
144
145
146
147
148
149
150
151 public ServerCrashProcedure(
152 final MasterProcedureEnv env,
153 final ServerName serverName,
154 final boolean shouldSplitWal,
155 final boolean carryingMeta) {
156 this.serverName = serverName;
157 this.shouldSplitWal = shouldSplitWal;
158 this.carryingMeta = carryingMeta;
159 this.setOwner(env.getRequestUser().getShortName());
160 }
161
162
163
164
165
166 public ServerCrashProcedure() {
167 super();
168 }
169
170 private void throwProcedureYieldException(final String msg) throws ProcedureYieldException {
171 String logMsg = msg + "; cycle=" + this.cycles + ", running for " +
172 StringUtils.formatTimeDiff(System.currentTimeMillis(), getStartTime());
173
174
175 if (LOG.isDebugEnabled()) LOG.debug(logMsg);
176 throw new ProcedureYieldException(logMsg);
177 }
178
179 @Override
180 protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
181 throws ProcedureYieldException {
182 if (LOG.isTraceEnabled()) {
183 LOG.trace(state);
184 }
185
186 if (state.ordinal() != this.previousState) {
187 this.previousState = state.ordinal();
188 this.cycles = 0;
189 } else {
190 this.cycles++;
191 }
192 MasterServices services = env.getMasterServices();
193
194 if (!services.getAssignmentManager().isFailoverCleanupDone()) {
195 throwProcedureYieldException("Waiting on master failover to complete");
196 }
197
198
199 if (!notifiedDeadServer) {
200 services.getServerManager().getDeadServers().notifyServer(serverName);
201 notifiedDeadServer = true;
202 }
203
204 try {
205 switch (state) {
206 case SERVER_CRASH_START:
207 LOG.info("Start processing crashed " + this.serverName);
208 start(env);
209
210 if (this.carryingMeta) setNextState(ServerCrashState.SERVER_CRASH_PROCESS_META);
211 else setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
212 break;
213
214 case SERVER_CRASH_GET_REGIONS:
215
216 if (!isMetaAssignedQuickTest(env)) {
217
218
219 long wait =
220 env.getMasterConfiguration().getLong(KEY_SHORT_WAIT_ON_META,
221 DEFAULT_SHORT_WAIT_ON_META);
222 wait = wait / 10;
223 Thread.sleep(wait);
224 throwProcedureYieldException("Waiting on hbase:meta assignment");
225 }
226 this.regionsOnCrashedServer =
227 services.getAssignmentManager().getRegionStates().getServerRegions(this.serverName);
228
229
230 if (!this.shouldSplitWal) {
231 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
232 } else if (this.distributedLogReplay) {
233 setNextState(ServerCrashState.SERVER_CRASH_PREPARE_LOG_REPLAY);
234 } else {
235 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
236 }
237 break;
238
239 case SERVER_CRASH_PROCESS_META:
240
241 if (!processMeta(env)) {
242 throwProcedureYieldException("Waiting on regions-in-transition to clear");
243 }
244 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
245 break;
246
247 case SERVER_CRASH_PREPARE_LOG_REPLAY:
248 prepareLogReplay(env, this.regionsOnCrashedServer);
249 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
250 break;
251
252 case SERVER_CRASH_SPLIT_LOGS:
253 splitLogs(env);
254
255 if (this.distributedLogReplay) setNextState(ServerCrashState.SERVER_CRASH_FINISH);
256 else setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
257 break;
258
259 case SERVER_CRASH_ASSIGN:
260 List<HRegionInfo> regionsToAssign = calcRegionsToAssign(env);
261
262
263
264
265
266
267 boolean regions = regionsToAssign != null && !regionsToAssign.isEmpty();
268 if (regions) {
269 this.regionsAssigned = regionsToAssign;
270 if (!assign(env, regionsToAssign)) {
271 throwProcedureYieldException("Failed assign; will retry");
272 }
273 }
274 if (this.shouldSplitWal && distributedLogReplay) {
275
276
277
278
279 setNextState(ServerCrashState.SERVER_CRASH_WAIT_ON_ASSIGN);
280 } else {
281 setNextState(ServerCrashState.SERVER_CRASH_FINISH);
282 }
283 break;
284
285 case SERVER_CRASH_WAIT_ON_ASSIGN:
286
287
288
289
290
291
292
293 if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
294 if (!waitOnAssign(env, this.regionsAssigned)) {
295 throwProcedureYieldException("Waiting on region assign");
296 }
297 }
298 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
299 break;
300
301 case SERVER_CRASH_FINISH:
302 LOG.info("Finished processing of crashed " + serverName);
303 services.getServerManager().getDeadServers().finish(serverName);
304 return Flow.NO_MORE_STATE;
305
306 default:
307 throw new UnsupportedOperationException("unhandled state=" + state);
308 }
309 } catch (ProcedureYieldException e) {
310 LOG.warn("Failed serverName=" + this.serverName + ", state=" + state + "; retry "
311 + e.getMessage());
312 throw e;
313 } catch (IOException e) {
314 LOG.warn("Failed serverName=" + this.serverName + ", state=" + state + "; retry", e);
315 } catch (InterruptedException e) {
316
317 LOG.warn("Interrupted serverName=" + this.serverName + ", state=" + state + "; retry", e);
318 Thread.currentThread().interrupt();
319 }
320 return Flow.HAS_MORE_STATE;
321 }
322
323
324
325
326
327
328 private void start(final MasterProcedureEnv env) throws IOException {
329 MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
330
331 mfs.setLogRecoveryMode();
332 this.distributedLogReplay = mfs.getLogRecoveryMode() == RecoveryMode.LOG_REPLAY;
333 }
334
335
336
337
338
339
340
341 private boolean processMeta(final MasterProcedureEnv env)
342 throws IOException {
343 if (LOG.isDebugEnabled()) LOG.debug("Processing hbase:meta that was on " + this.serverName);
344 MasterServices services = env.getMasterServices();
345 MasterFileSystem mfs = services.getMasterFileSystem();
346 AssignmentManager am = services.getAssignmentManager();
347 HRegionInfo metaHRI = HRegionInfo.FIRST_META_REGIONINFO;
348 if (this.shouldSplitWal) {
349 if (this.distributedLogReplay) {
350 prepareLogReplay(env, META_REGION_SET);
351 } else {
352
353 mfs.splitMetaLog(serverName);
354 am.getRegionStates().logSplit(metaHRI);
355 }
356 }
357
358
359 boolean processed = true;
360 boolean shouldAssignMeta = false;
361 AssignmentManager.ServerHostRegion rsCarryingMetaRegion = am.isCarryingMeta(serverName);
362 switch (rsCarryingMetaRegion) {
363 case HOSTING_REGION:
364 LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
365 am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
366 shouldAssignMeta = true;
367 break;
368 case UNKNOWN:
369 if (!services.getMetaTableLocator().isLocationAvailable(services.getZooKeeper())) {
370
371
372
373 shouldAssignMeta = true;
374 break;
375 }
376
377 case NOT_HOSTING_REGION:
378 LOG.info("META has been assigned to otherwhere, skip assigning.");
379 break;
380 default:
381 throw new IOException("Unsupported action in MetaServerShutdownHandler");
382 }
383 if (shouldAssignMeta) {
384
385 verifyAndAssignMetaWithRetries(env);
386 if (this.shouldSplitWal && distributedLogReplay) {
387 int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
388 if (!waitOnRegionToClearRegionsInTransition(am, metaHRI, timeout)) {
389 processed = false;
390 } else {
391
392 mfs.splitMetaLog(serverName);
393 }
394 }
395 }
396 return processed;
397 }
398
399
400
401
402
403 private boolean waitOnRegionToClearRegionsInTransition(AssignmentManager am,
404 final HRegionInfo hri, final int timeout)
405 throws InterruptedIOException {
406 try {
407 if (!am.waitOnRegionToClearRegionsInTransition(hri, timeout)) {
408
409
410 LOG.warn("Region " + hri.getEncodedName() + " didn't complete assignment in time");
411 return false;
412 }
413 } catch (InterruptedException ie) {
414 throw new InterruptedIOException("Caught " + ie +
415 " during waitOnRegionToClearRegionsInTransition for " + hri);
416 }
417 return true;
418 }
419
420 private void prepareLogReplay(final MasterProcedureEnv env, final Set<HRegionInfo> regions)
421 throws IOException {
422 if (LOG.isDebugEnabled()) {
423 LOG.debug("Mark " + size(this.regionsOnCrashedServer) + " regions-in-recovery from " +
424 this.serverName);
425 }
426 MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
427 AssignmentManager am = env.getMasterServices().getAssignmentManager();
428 mfs.prepareLogReplay(this.serverName, regions);
429 am.getRegionStates().logSplit(this.serverName);
430 }
431
432 private void splitLogs(final MasterProcedureEnv env) throws IOException {
433 if (LOG.isDebugEnabled()) {
434 LOG.debug("Splitting logs from " + serverName + "; region count=" +
435 size(this.regionsOnCrashedServer));
436 }
437 MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
438 AssignmentManager am = env.getMasterServices().getAssignmentManager();
439
440 mfs.splitLog(this.serverName);
441 am.getRegionStates().logSplit(this.serverName);
442 }
443
444 static int size(final Collection<HRegionInfo> hris) {
445 return hris == null? 0: hris.size();
446 }
447
448
449
450
451
452
453
454 private List<HRegionInfo> calcRegionsToAssign(final MasterProcedureEnv env)
455 throws IOException {
456 AssignmentManager am = env.getMasterServices().getAssignmentManager();
457 List<HRegionInfo> regionsToAssignAggregator = new ArrayList<HRegionInfo>();
458 int replicaCount = env.getMasterConfiguration().getInt(HConstants.META_REPLICAS_NUM,
459 HConstants.DEFAULT_META_REPLICA_NUM);
460 for (int i = 1; i < replicaCount; i++) {
461 HRegionInfo metaHri =
462 RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
463 if (am.isCarryingMetaReplica(this.serverName, metaHri) ==
464 AssignmentManager.ServerHostRegion.HOSTING_REGION) {
465 if (LOG.isDebugEnabled()) {
466 LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName);
467 }
468 regionsToAssignAggregator.add(metaHri);
469 }
470 }
471
472 List<HRegionInfo> regionsInTransition = am.cleanOutCrashedServerReferences(serverName);
473 if (LOG.isDebugEnabled()) {
474 LOG.debug("Reassigning " + size(this.regionsOnCrashedServer) +
475 " region(s) that " + (serverName == null? "null": serverName) +
476 " was carrying (and " + regionsInTransition.size() +
477 " regions(s) that were opening on this server)");
478 }
479 regionsToAssignAggregator.addAll(regionsInTransition);
480
481
482 if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
483 RegionStates regionStates = am.getRegionStates();
484 for (HRegionInfo hri: this.regionsOnCrashedServer) {
485 if (regionsInTransition.contains(hri)) continue;
486 String encodedName = hri.getEncodedName();
487 Lock lock = am.acquireRegionLock(encodedName);
488 try {
489 RegionState rit = regionStates.getRegionTransitionState(hri);
490 if (processDeadRegion(hri, am)) {
491 ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
492 if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
493
494
495
496 LOG.info("Skip assigning " + hri.getRegionNameAsString()
497 + " because opened on " + addressFromAM.getServerName());
498 continue;
499 }
500 if (rit != null) {
501 if (rit.getServerName() != null && !rit.isOnServer(this.serverName)) {
502
503 LOG.info("Skip assigning region in transition on other server" + rit);
504 continue;
505 }
506 LOG.info("Reassigning region " + rit + " and clearing zknode if exists");
507 try {
508
509 ZKAssign.deleteNodeFailSilent(env.getMasterServices().getZooKeeper(), hri);
510 } catch (KeeperException e) {
511
512
513 env.getMasterServices().abort("Unexpected error deleting RIT " + hri, e);
514 throw new IOException(e);
515 }
516 regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
517 } else if (regionStates.isRegionInState(
518 hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
519 regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
520 }
521 regionsToAssignAggregator.add(hri);
522
523 } else if (rit != null) {
524 if ((rit.isPendingCloseOrClosing() || rit.isOffline())
525 && am.getTableStateManager().isTableState(hri.getTable(),
526 ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
527 am.getReplicasToClose().contains(hri)) {
528
529
530
531
532
533 regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
534 am.deleteClosingOrClosedNode(hri, rit.getServerName());
535 am.offlineDisabledRegion(hri);
536 } else {
537 LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
538 + rit + " not to be assigned by SSH of server " + serverName);
539 }
540 }
541 } finally {
542 lock.unlock();
543 }
544 }
545 }
546 return regionsToAssignAggregator;
547 }
548
549 private boolean assign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
550 throws InterruptedIOException {
551 MasterServices masterServices = env.getMasterServices();
552 AssignmentManager am = masterServices.getAssignmentManager();
553
554 boolean retainAssignment =
555 (masterServices.getConfiguration().getBoolean("hbase.master.retain.assignment", true) &&
556 masterServices.getServerManager().isServerWithSameHostnamePortOnline(serverName)) ?
557 true : false;
558 try {
559 if (retainAssignment) {
560 Map<HRegionInfo, ServerName> hriServerMap =
561 new HashMap<HRegionInfo, ServerName>(hris.size());
562 for (HRegionInfo hri: hris) {
563 hriServerMap.put(hri, serverName);
564 }
565 LOG.info("Best effort in SSH to retain assignment of " + hris.size()
566 + " regions from the dead server " + serverName);
567 am.assign(hriServerMap);
568 } else {
569 LOG.info("Using round robin in SSH to assign " + hris.size()
570 + " regions from the dead server " + serverName);
571 am.assign(hris);
572 }
573 } catch (InterruptedException ie) {
574 LOG.error("Caught " + ie + " during " + (retainAssignment ? "retaining" : "round-robin")
575 + " assignment");
576 throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
577 } catch (IOException ioe) {
578 LOG.warn("Caught " + ioe + " during region assignment, will retry");
579 return false;
580 }
581 return true;
582 }
583
584 private boolean waitOnAssign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
585 throws InterruptedIOException {
586 int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
587 for (HRegionInfo hri: hris) {
588
589 if (!waitOnRegionToClearRegionsInTransition(env.getMasterServices().getAssignmentManager(),
590 hri, timeout)) {
591 return false;
592 }
593 }
594 return true;
595 }
596
597 @Override
598 protected void rollbackState(MasterProcedureEnv env, ServerCrashState state)
599 throws IOException {
600
601 throw new UnsupportedOperationException("unhandled state=" + state);
602 }
603
604 @Override
605 protected ServerCrashState getState(int stateId) {
606 return ServerCrashState.valueOf(stateId);
607 }
608
609 @Override
610 protected int getStateId(ServerCrashState state) {
611 return state.getNumber();
612 }
613
614 @Override
615 protected ServerCrashState getInitialState() {
616 return ServerCrashState.SERVER_CRASH_START;
617 }
618
619 @Override
620 protected boolean abort(MasterProcedureEnv env) {
621
622 return false;
623 }
624
625 @Override
626 protected boolean acquireLock(final MasterProcedureEnv env) {
627 if (env.waitServerCrashProcessingEnabled(this)) return false;
628 return env.getProcedureQueue().tryAcquireServerExclusiveLock(this, getServerName());
629 }
630
631 @Override
632 protected void releaseLock(final MasterProcedureEnv env) {
633 env.getProcedureQueue().releaseServerExclusiveLock(this, getServerName());
634 }
635
636 @Override
637 public void toStringClassDetails(StringBuilder sb) {
638 sb.append(getClass().getSimpleName());
639 sb.append(" serverName=");
640 sb.append(this.serverName);
641 sb.append(", shouldSplitWal=");
642 sb.append(shouldSplitWal);
643 sb.append(", carryingMeta=");
644 sb.append(carryingMeta);
645 }
646
647 @Override
648 public void serializeStateData(final OutputStream stream) throws IOException {
649 super.serializeStateData(stream);
650
651 MasterProcedureProtos.ServerCrashStateData.Builder state =
652 MasterProcedureProtos.ServerCrashStateData.newBuilder().
653 setServerName(ProtobufUtil.toServerName(this.serverName)).
654 setDistributedLogReplay(this.distributedLogReplay).
655 setCarryingMeta(this.carryingMeta).
656 setShouldSplitWal(this.shouldSplitWal);
657 if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
658 for (HRegionInfo hri: this.regionsOnCrashedServer) {
659 state.addRegionsOnCrashedServer(HRegionInfo.convert(hri));
660 }
661 }
662 if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
663 for (HRegionInfo hri: this.regionsAssigned) {
664 state.addRegionsAssigned(HRegionInfo.convert(hri));
665 }
666 }
667 state.build().writeDelimitedTo(stream);
668 }
669
670 @Override
671 public void deserializeStateData(final InputStream stream) throws IOException {
672 super.deserializeStateData(stream);
673
674 MasterProcedureProtos.ServerCrashStateData state =
675 MasterProcedureProtos.ServerCrashStateData.parseDelimitedFrom(stream);
676 this.serverName = ProtobufUtil.toServerName(state.getServerName());
677 this.distributedLogReplay = state.hasDistributedLogReplay()?
678 state.getDistributedLogReplay(): false;
679 this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false;
680
681 this.shouldSplitWal = state.getShouldSplitWal();
682 int size = state.getRegionsOnCrashedServerCount();
683 if (size > 0) {
684 this.regionsOnCrashedServer = new HashSet<HRegionInfo>(size);
685 for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
686 this.regionsOnCrashedServer.add(HRegionInfo.convert(ri));
687 }
688 }
689 size = state.getRegionsAssignedCount();
690 if (size > 0) {
691 this.regionsAssigned = new ArrayList<HRegionInfo>(size);
692 for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
693 this.regionsAssigned.add(HRegionInfo.convert(ri));
694 }
695 }
696 }
697
698
699
700
701
702
703
704
705
706 private static boolean processDeadRegion(HRegionInfo hri, AssignmentManager assignmentManager)
707 throws IOException {
708 boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable());
709 if (!tablePresent) {
710 LOG.info("The table " + hri.getTable() + " was deleted. Hence not proceeding.");
711 return false;
712 }
713
714 boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
715 ZooKeeperProtos.Table.State.DISABLED);
716 if (disabled){
717 LOG.info("The table " + hri.getTable() + " was disabled. Hence not proceeding.");
718 return false;
719 }
720 if (hri.isOffline() && hri.isSplit()) {
721
722
723
724 return false;
725 }
726 boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
727 ZooKeeperProtos.Table.State.DISABLING);
728 if (disabling) {
729 LOG.info("The table " + hri.getTable() + " is disabled. Hence not assigning region" +
730 hri.getEncodedName());
731 return false;
732 }
733 return true;
734 }
735
736
737
738
739
740 private void verifyAndAssignMetaWithRetries(final MasterProcedureEnv env) throws IOException {
741 MasterServices services = env.getMasterServices();
742 int iTimes = services.getConfiguration().getInt(KEY_RETRIES_ON_META, DEFAULT_RETRIES_ON_META);
743
744 long waitTime =
745 services.getConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
746 int iFlag = 0;
747 while (true) {
748 try {
749 verifyAndAssignMeta(env);
750 break;
751 } catch (KeeperException e) {
752 services.abort("In server shutdown processing, assigning meta", e);
753 throw new IOException("Aborting", e);
754 } catch (Exception e) {
755 if (iFlag >= iTimes) {
756 services.abort("verifyAndAssignMeta failed after" + iTimes + " retries, aborting", e);
757 throw new IOException("Aborting", e);
758 }
759 try {
760 Thread.sleep(waitTime);
761 } catch (InterruptedException e1) {
762 LOG.warn("Interrupted when is the thread sleep", e1);
763 Thread.currentThread().interrupt();
764 throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
765 }
766 iFlag++;
767 }
768 }
769 }
770
771
772
773
774
775
776
777 private void verifyAndAssignMeta(final MasterProcedureEnv env)
778 throws InterruptedException, IOException, KeeperException {
779 MasterServices services = env.getMasterServices();
780 if (!isMetaAssignedQuickTest(env)) {
781 services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
782 } else if (serverName.equals(services.getMetaTableLocator().
783 getMetaRegionLocation(services.getZooKeeper()))) {
784
785
786 services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
787 } else {
788 LOG.info("Skip assigning hbase:meta because it is online at "
789 + services.getMetaTableLocator().getMetaRegionLocation(services.getZooKeeper()));
790 }
791 }
792
793
794
795
796
797
798
799 private boolean isMetaAssignedQuickTest(final MasterProcedureEnv env)
800 throws InterruptedException, IOException {
801 ZooKeeperWatcher zkw = env.getMasterServices().getZooKeeper();
802 MetaTableLocator mtl = env.getMasterServices().getMetaTableLocator();
803 boolean metaAssigned = false;
804
805 if (mtl.isLocationAvailable(zkw)) {
806 ClusterConnection connection = env.getMasterServices().getConnection();
807
808 long timeout =
809 env.getMasterConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
810 if (mtl.verifyMetaRegionLocation(connection, zkw, timeout)) {
811 metaAssigned = true;
812 }
813 }
814 return metaAssigned;
815 }
816
817 @Override
818 public ServerName getServerName() {
819 return this.serverName;
820 }
821
822 @Override
823 public boolean hasMetaTableRegion() {
824 return this.carryingMeta;
825 }
826
827 @Override
828 public ServerOperationType getServerOperationType() {
829 return ServerOperationType.CRASH_HANDLER;
830 }
831
832
833
834
835
836
837
838
839 @Override
840 protected boolean isYieldBeforeExecuteFromState(MasterProcedureEnv env, ServerCrashState state) {
841 return true;
842 }
843
844 @Override
845 protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
846
847
848 return false;
849 }
850 }