View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.HashMap;
25  import java.util.HashSet;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  import java.util.TreeMap;
31  
32  import com.google.common.annotations.VisibleForTesting;
33  import com.google.common.base.Preconditions;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.hbase.classification.InterfaceAudience;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.MetaTableAccessor;
42  import org.apache.hadoop.hbase.Server;
43  import org.apache.hadoop.hbase.ServerLoad;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
47  import org.apache.hadoop.hbase.master.RegionState.State;
48  import org.apache.hadoop.hbase.client.TableState;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.Pair;
52  
53  /**
54   * Region state accountant. It holds the states of all regions in the memory.
55   * In normal scenario, it should match the meta table and the true region states.
56   *
57   * This map is used by AssignmentManager to track region states.
58   */
59  @InterfaceAudience.Private
60  public class RegionStates {
61    private static final Log LOG = LogFactory.getLog(RegionStates.class);
62  
63    /**
64     * Regions currently in transition.
65     */
66    final HashMap<String, RegionState> regionsInTransition =
67      new HashMap<String, RegionState>();
68  
69    /**
70     * Region encoded name to state map.
71     * All the regions should be in this map.
72     */
73    private final Map<String, RegionState> regionStates =
74      new HashMap<String, RegionState>();
75  
76    /**
77     * Server to regions assignment map.
78     * Contains the set of regions currently assigned to a given server.
79     */
80    private final Map<ServerName, Set<HRegionInfo>> serverHoldings =
81      new HashMap<ServerName, Set<HRegionInfo>>();
82  
83    /**
84     * Maintains the mapping from the default region to the replica regions.
85     */
86    private final Map<HRegionInfo, Set<HRegionInfo>> defaultReplicaToOtherReplicas =
87      new HashMap<HRegionInfo, Set<HRegionInfo>>();
88  
89    /**
90     * Region to server assignment map.
91     * Contains the server a given region is currently assigned to.
92     */
93    private final TreeMap<HRegionInfo, ServerName> regionAssignments =
94      new TreeMap<HRegionInfo, ServerName>();
95  
96    /**
97     * Encoded region name to server assignment map for re-assignment
98     * purpose. Contains the server a given region is last known assigned
99     * to, which has not completed log splitting, so not assignable.
100    * If a region is currently assigned, this server info in this
101    * map should be the same as that in regionAssignments.
102    * However the info in regionAssignments is cleared when the region
103    * is offline while the info in lastAssignments is cleared when
104    * the region is closed or the server is dead and processed.
105    */
106   private final HashMap<String, ServerName> lastAssignments =
107     new HashMap<String, ServerName>();
108 
109   /**
110    * Encoded region name to server assignment map for the
111    * purpose to clean up serverHoldings when a region is online
112    * on a new server. When the region is offline from the previous
113    * server, we cleaned up regionAssignments so that it has the
114    * latest assignment map. But we didn't clean up serverHoldings
115    * to match the meta. We need this map to find out the old server
116    * whose serverHoldings needs cleanup, given a moved region.
117    */
118   private final HashMap<String, ServerName> oldAssignments =
119     new HashMap<String, ServerName>();
120 
121   /**
122    * Map a host port pair string to the latest start code
123    * of a region server which is known to be dead. It is dead
124    * to us, but server manager may not know it yet.
125    */
126   private final HashMap<String, Long> deadServers =
127     new HashMap<String, Long>();
128 
129   /**
130    * Map a dead servers to the time when log split is done.
131    * Since log splitting is not ordered, we have to remember
132    * all processed instances. The map is cleaned up based
133    * on a configured time. By default, we assume a dead
134    * server should be done with log splitting in two hours.
135    */
136   private final HashMap<ServerName, Long> processedServers =
137     new HashMap<ServerName, Long>();
138   private long lastProcessedServerCleanTime;
139 
140   private final TableStateManager tableStateManager;
141   private final RegionStateStore regionStateStore;
142   private final ServerManager serverManager;
143   private final Server server;
144 
145   // The maximum time to keep a log split info in region states map
146   static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
147   static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
148 
149   RegionStates(final Server master, final TableStateManager tableStateManager,
150       final ServerManager serverManager, final RegionStateStore regionStateStore) {
151     this.tableStateManager = tableStateManager;
152     this.regionStateStore = regionStateStore;
153     this.serverManager = serverManager;
154     this.server = master;
155   }
156 
157   /**
158    * @return an unmodifiable the region assignment map
159    */
160   public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
161     return Collections.unmodifiableMap(regionAssignments);
162   }
163 
164   /**
165    * Return the replicas (including default) for the regions grouped by ServerName
166    * @param regions
167    * @return a pair containing the groupings as a map
168    */
169   synchronized Map<ServerName, List<HRegionInfo>> getRegionAssignments(
170     Collection<HRegionInfo> regions) {
171     Map<ServerName, List<HRegionInfo>> map = new HashMap<ServerName, List<HRegionInfo>>();
172     for (HRegionInfo region : regions) {
173       HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(region);
174       Set<HRegionInfo> allReplicas = defaultReplicaToOtherReplicas.get(defaultReplica);
175       if (allReplicas != null) {
176         for (HRegionInfo hri : allReplicas) {
177           ServerName server = regionAssignments.get(hri);
178           if (server != null) {
179             List<HRegionInfo> regionsOnServer = map.get(server);
180             if (regionsOnServer == null) {
181               regionsOnServer = new ArrayList<HRegionInfo>(1);
182               map.put(server, regionsOnServer);
183             }
184             regionsOnServer.add(hri);
185           }
186         }
187       }
188     }
189     return map;
190   }
191 
192   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
193     return regionAssignments.get(hri);
194   }
195 
196   /**
197    * Get regions in transition and their states
198    */
199   @SuppressWarnings("unchecked")
200   public synchronized Map<String, RegionState> getRegionsInTransition() {
201     return (Map<String, RegionState>)regionsInTransition.clone();
202   }
203 
204   /**
205    * @return True if specified region in transition.
206    */
207   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
208     return regionsInTransition.containsKey(hri.getEncodedName());
209   }
210 
211   /**
212    * @return True if specified region in transition.
213    */
214   public synchronized boolean isRegionInTransition(final String encodedName) {
215     return regionsInTransition.containsKey(encodedName);
216   }
217 
218   /**
219    * @return True if any region in transition.
220    */
221   public synchronized boolean isRegionsInTransition() {
222     return !regionsInTransition.isEmpty();
223   }
224 
225   /**
226    * @return True if specified region assigned, and not in transition.
227    */
228   public synchronized boolean isRegionOnline(final HRegionInfo hri) {
229     return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
230   }
231 
232   /**
233    * @return True if specified region offline/closed, but not in transition.
234    * If the region is not in the map, it is offline to us too.
235    */
236   public synchronized boolean isRegionOffline(final HRegionInfo hri) {
237     return getRegionState(hri) == null || (!isRegionInTransition(hri)
238       && isRegionInState(hri, State.OFFLINE, State.CLOSED));
239   }
240 
241   /**
242    * @return True if specified region is in one of the specified states.
243    */
244   public boolean isRegionInState(
245       final HRegionInfo hri, final State... states) {
246     return isRegionInState(hri.getEncodedName(), states);
247   }
248 
249   /**
250    * @return True if specified region is in one of the specified states.
251    */
252   public boolean isRegionInState(
253       final String encodedName, final State... states) {
254     RegionState regionState = getRegionState(encodedName);
255     return isOneOfStates(regionState, states);
256   }
257 
258   /**
259    * Wait for the state map to be updated by assignment manager.
260    */
261   public synchronized void waitForUpdate(
262       final long timeout) throws InterruptedException {
263     this.wait(timeout);
264   }
265 
266   /**
267    * Get region transition state
268    */
269   public RegionState getRegionTransitionState(final HRegionInfo hri) {
270     return getRegionTransitionState(hri.getEncodedName());
271   }
272 
273   /**
274    * Get region transition state
275    */
276   public synchronized RegionState
277       getRegionTransitionState(final String encodedName) {
278     return regionsInTransition.get(encodedName);
279   }
280 
281   /**
282    * Add a list of regions to RegionStates. If a region is split
283    * and offline, its state will be SPLIT. Otherwise, its state will
284    * be OFFLINE. Region already in RegionStates will be skipped.
285    */
286   public void createRegionStates(
287       final List<HRegionInfo> hris) {
288     for (HRegionInfo hri: hris) {
289       createRegionState(hri);
290     }
291   }
292 
293   /**
294    * Add a region to RegionStates. If the region is split
295    * and offline, its state will be SPLIT. Otherwise, its state will
296    * be OFFLINE. If it is already in RegionStates, this call has
297    * no effect, and the original state is returned.
298    */
299   public RegionState createRegionState(final HRegionInfo hri) {
300     return createRegionState(hri, null, null, null);
301   }
302 
303   /**
304    * Add a region to RegionStates with the specified state.
305    * If the region is already in RegionStates, this call has
306    * no effect, and the original state is returned.
307    *
308    * @param hri the region info to create a state for
309    * @param newState the state to the region in set to
310    * @param serverName the server the region is transitioning on
311    * @param lastHost the last server that hosts the region
312    * @return the current state
313    */
314   public synchronized RegionState createRegionState(final HRegionInfo hri,
315       State newState, ServerName serverName, ServerName lastHost) {
316     if (newState == null || (newState == State.OPEN && serverName == null)) {
317       newState =  State.OFFLINE;
318     }
319     if (hri.isOffline() && hri.isSplit()) {
320       newState = State.SPLIT;
321       serverName = null;
322     }
323     String encodedName = hri.getEncodedName();
324     RegionState regionState = regionStates.get(encodedName);
325     if (regionState != null) {
326       LOG.warn("Tried to create a state for a region already in RegionStates, "
327         + "used existing: " + regionState + ", ignored new: " + newState);
328     } else {
329       regionState = new RegionState(hri, newState, serverName);
330       regionStates.put(encodedName, regionState);
331       if (newState == State.OPEN) {
332         if (!serverName.equals(lastHost)) {
333           LOG.warn("Open region's last host " + lastHost
334             + " should be the same as the current one " + serverName
335             + ", ignored the last and used the current one");
336           lastHost = serverName;
337         }
338         lastAssignments.put(encodedName, lastHost);
339         regionAssignments.put(hri, lastHost);
340       } else if (!isOneOfStates(regionState, State.MERGED, State.SPLIT, State.OFFLINE)) {
341         regionsInTransition.put(encodedName, regionState);
342       }
343       if (lastHost != null && newState != State.SPLIT) {
344         addToServerHoldings(lastHost, hri);
345         if (newState != State.OPEN) {
346           oldAssignments.put(encodedName, lastHost);
347         }
348       }
349     }
350     return regionState;
351   }
352 
353   /**
354    * Update a region state. It will be put in transition if not already there.
355    */
356   public RegionState updateRegionState(
357       final HRegionInfo hri, final State state) {
358     RegionState regionState = getRegionState(hri.getEncodedName());
359     return updateRegionState(hri, state,
360       regionState == null ? null : regionState.getServerName());
361   }
362 
363   /**
364    * Update a region state. It will be put in transition if not already there.
365    */
366   public RegionState updateRegionState(
367       final HRegionInfo hri, final State state, final ServerName serverName) {
368     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
369   }
370 
371   public void regionOnline(
372       final HRegionInfo hri, final ServerName serverName) {
373     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
374   }
375 
376   /**
377    * A region is online, won't be in transition any more.
378    * We can't confirm it is really online on specified region server
379    * because it hasn't been put in region server's online region list yet.
380    */
381   public void regionOnline(final HRegionInfo hri,
382       final ServerName serverName, long openSeqNum) {
383     String encodedName = hri.getEncodedName();
384     if (!serverManager.isServerOnline(serverName)) {
385       // This is possible if the region server dies before master gets a
386       // chance to handle ZK event in time. At this time, if the dead server
387       // is already processed by SSH, we should ignore this event.
388       // If not processed yet, ignore and let SSH deal with it.
389       LOG.warn("Ignored, " + encodedName
390         + " was opened on a dead server: " + serverName);
391       return;
392     }
393     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
394 
395     synchronized (this) {
396       regionsInTransition.remove(encodedName);
397       ServerName oldServerName = regionAssignments.put(hri, serverName);
398       if (!serverName.equals(oldServerName)) {
399         if (LOG.isDebugEnabled()) {
400           LOG.debug("Onlined " + hri.getShortNameToLog() + " on " + serverName + " " + hri);
401         } else {
402           LOG.debug("Onlined " + hri.getShortNameToLog() + " on " + serverName);
403         }
404         addToServerHoldings(serverName, hri);
405         addToReplicaMapping(hri);
406         if (oldServerName == null) {
407           oldServerName = oldAssignments.remove(encodedName);
408         }
409         if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
410           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
411           removeFromServerHoldings(oldServerName, hri);
412         }
413       }
414     }
415   }
416 
417   private void addToServerHoldings(ServerName serverName, HRegionInfo hri) {
418     Set<HRegionInfo> regions = serverHoldings.get(serverName);
419     if (regions == null) {
420       regions = new HashSet<HRegionInfo>();
421       serverHoldings.put(serverName, regions);
422     }
423     regions.add(hri);
424   }
425 
426   private void addToReplicaMapping(HRegionInfo hri) {
427     HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
428     Set<HRegionInfo> replicas =
429         defaultReplicaToOtherReplicas.get(defaultReplica);
430     if (replicas == null) {
431       replicas = new HashSet<HRegionInfo>();
432       defaultReplicaToOtherReplicas.put(defaultReplica, replicas);
433     }
434     replicas.add(hri);
435   }
436 
437   private void removeFromServerHoldings(ServerName serverName, HRegionInfo hri) {
438     Set<HRegionInfo> oldRegions = serverHoldings.get(serverName);
439     oldRegions.remove(hri);
440     if (oldRegions.isEmpty()) {
441       serverHoldings.remove(serverName);
442     }
443   }
444 
445   private void removeFromReplicaMapping(HRegionInfo hri) {
446     HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
447     Set<HRegionInfo> replicas = defaultReplicaToOtherReplicas.get(defaultReplica);
448     if (replicas != null) {
449       replicas.remove(hri);
450       if (replicas.isEmpty()) {
451         defaultReplicaToOtherReplicas.remove(defaultReplica);
452       }
453     }
454   }
455 
456   /**
457    * A dead server's wals have been split so that all the regions
458    * used to be open on it can be safely assigned now. Mark them assignable.
459    */
460   public synchronized void logSplit(final ServerName serverName) {
461     for (Iterator<Map.Entry<String, ServerName>> it
462         = lastAssignments.entrySet().iterator(); it.hasNext();) {
463       Map.Entry<String, ServerName> e = it.next();
464       if (e.getValue().equals(serverName)) {
465         it.remove();
466       }
467     }
468     long now = System.currentTimeMillis();
469     if (LOG.isDebugEnabled()) {
470       LOG.debug("Adding to processed servers " + serverName);
471     }
472     processedServers.put(serverName, Long.valueOf(now));
473     Configuration conf = server.getConfiguration();
474     long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
475     // Doesn't have to be very accurate about the clean up time
476     if (now > lastProcessedServerCleanTime + obsoleteTime) {
477       lastProcessedServerCleanTime = now;
478       long cutoff = now - obsoleteTime;
479       for (Iterator<Map.Entry<ServerName, Long>> it
480           = processedServers.entrySet().iterator(); it.hasNext();) {
481         Map.Entry<ServerName, Long> e = it.next();
482         if (e.getValue().longValue() < cutoff) {
483           if (LOG.isDebugEnabled()) {
484             LOG.debug("Removed from processed servers " + e.getKey());
485           }
486           it.remove();
487         }
488       }
489     }
490   }
491 
492   /**
493    * Log split is done for a given region, so it is assignable now.
494    */
495   public void logSplit(final HRegionInfo region) {
496     clearLastAssignment(region);
497   }
498 
499   public synchronized void clearLastAssignment(final HRegionInfo region) {
500     lastAssignments.remove(region.getEncodedName());
501   }
502 
503   /**
504    * A region is offline, won't be in transition any more.
505    */
506   public void regionOffline(final HRegionInfo hri) {
507     regionOffline(hri, null);
508   }
509 
510   /**
511    * A region is offline, won't be in transition any more. Its state
512    * should be the specified expected state, which can only be
513    * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
514    */
515   public void regionOffline(
516       final HRegionInfo hri, final State expectedState) {
517     Preconditions.checkArgument(expectedState == null
518       || RegionState.isUnassignable(expectedState),
519         "Offlined region should not be " + expectedState);
520     if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
521       // Remove it from all region maps
522       deleteRegion(hri);
523       return;
524     }
525     State newState =
526       expectedState == null ? State.OFFLINE : expectedState;
527     updateRegionState(hri, newState);
528     String encodedName = hri.getEncodedName();
529     synchronized (this) {
530       regionsInTransition.remove(encodedName);
531       ServerName oldServerName = regionAssignments.remove(hri);
532       if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
533         if (newState == State.MERGED || newState == State.SPLIT
534             || hri.isMetaRegion() || tableStateManager.isTableState(hri.getTable(),
535               TableState.State.DISABLED, TableState.State.DISABLING)) {
536           // Offline the region only if it's merged/split, or the table is disabled/disabling.
537           // Otherwise, offline it from this server only when it is online on a different server.
538           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
539           removeFromServerHoldings(oldServerName, hri);
540           removeFromReplicaMapping(hri);
541         } else {
542           // Need to remember it so that we can offline it from this
543           // server when it is online on a different server.
544           oldAssignments.put(encodedName, oldServerName);
545         }
546       }
547     }
548   }
549 
550   /**
551    * A server is offline, all regions on it are dead.
552    */
553   public synchronized List<HRegionInfo> serverOffline(final ServerName sn) {
554     // Offline all regions on this server not already in transition.
555     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
556     Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
557     if (assignedRegions == null) {
558       assignedRegions = new HashSet<HRegionInfo>();
559     }
560 
561     // Offline regions outside the loop to avoid ConcurrentModificationException
562     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
563     for (HRegionInfo region : assignedRegions) {
564       // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
565       if (isRegionOnline(region)) {
566         regionsToOffline.add(region);
567       } else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
568         LOG.debug("Offline splitting/merging region " + getRegionState(region));
569         regionsToOffline.add(region);
570       }
571     }
572 
573     for (RegionState state : regionsInTransition.values()) {
574       HRegionInfo hri = state.getRegion();
575       if (assignedRegions.contains(hri)) {
576         // Region is open on this region server, but in transition.
577         // This region must be moving away from this server, or splitting/merging.
578         // SSH will handle it, either skip assigning, or re-assign.
579         LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
580       } else if (sn.equals(state.getServerName())) {
581         // Region is in transition on this region server, and this
582         // region is not open on this server. So the region must be
583         // moving to this server from another one (i.e. opening or
584         // pending open on this server, was open on another one.
585         // Offline state is also kind of pending open if the region is in
586         // transition. The region could be in failed_close state too if we have
587         // tried several times to open it while this region server is not reachable)
588         if (isOneOfStates(state, State.OPENING, State.PENDING_OPEN,
589             State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) {
590           LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
591           rits.add(hri);
592         } else if (isOneOfStates(state, State.SPLITTING_NEW)) {
593           try {
594             if (MetaTableAccessor.getRegion(server.getConnection(), state.getRegion()
595                 .getEncodedNameAsBytes()) == null) {
596               regionsToOffline.add(state.getRegion());
597               FSUtils.deleteRegionDir(server.getConfiguration(), state.getRegion());
598             }
599           } catch (IOException e) {
600             LOG.warn("Got exception while deleting " + state.getRegion()
601                 + " directories from file system.", e);
602           }
603         } else {
604           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
605         }
606       }
607     }
608 
609     for (HRegionInfo hri : regionsToOffline) {
610       regionOffline(hri);
611     }
612 
613     this.notifyAll();
614     return rits;
615   }
616 
617   /**
618    * Gets the online regions of the specified table.
619    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
620    * Only returns <em>online</em> regions.  If a region on this table has been
621    * closed during a disable, etc., it will be included in the returned list.
622    * So, the returned list may not necessarily be ALL regions in this table, its
623    * all the ONLINE regions in the table.
624    * @param tableName
625    * @return Online regions from <code>tableName</code>
626    */
627   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
628     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
629     // boundary needs to have table's name but regionID 0 so that it is sorted
630     // before all table's regions.
631     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
632     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
633       if(!hri.getTable().equals(tableName)) break;
634       tableRegions.add(hri);
635     }
636     return tableRegions;
637   }
638 
639 
640   /**
641    * Wait on region to clear regions-in-transition.
642    * <p>
643    * If the region isn't in transition, returns immediately.  Otherwise, method
644    * blocks until the region is out of transition.
645    */
646   public synchronized void waitOnRegionToClearRegionsInTransition(
647       final HRegionInfo hri) throws InterruptedException {
648     if (!isRegionInTransition(hri)) return;
649 
650     while(!server.isStopped() && isRegionInTransition(hri)) {
651       RegionState rs = getRegionState(hri);
652       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
653       waitForUpdate(100);
654     }
655 
656     if (server.isStopped()) {
657       LOG.info("Giving up wait on region in " +
658         "transition because stoppable.isStopped is set");
659     }
660   }
661 
662   /**
663    * A table is deleted. Remove its regions from all internal maps.
664    * We loop through all regions assuming we don't delete tables too much.
665    */
666   public void tableDeleted(final TableName tableName) {
667     Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
668     synchronized (this) {
669       for (RegionState state: regionStates.values()) {
670         HRegionInfo region = state.getRegion();
671         if (region.getTable().equals(tableName)) {
672           regionsToDelete.add(region);
673         }
674       }
675     }
676     for (HRegionInfo region: regionsToDelete) {
677       deleteRegion(region);
678     }
679   }
680 
681   /**
682    * Get a copy of all regions assigned to a server
683    */
684   public synchronized Set<HRegionInfo> getServerRegions(ServerName serverName) {
685     Set<HRegionInfo> regions = serverHoldings.get(serverName);
686     if (regions == null) return null;
687     return new HashSet<HRegionInfo>(regions);
688   }
689 
690   /**
691    * Remove a region from all state maps.
692    */
693   @VisibleForTesting
694   public synchronized void deleteRegion(final HRegionInfo hri) {
695     String encodedName = hri.getEncodedName();
696     regionsInTransition.remove(encodedName);
697     regionStates.remove(encodedName);
698     lastAssignments.remove(encodedName);
699     ServerName sn = regionAssignments.remove(hri);
700     if (sn != null) {
701       Set<HRegionInfo> regions = serverHoldings.get(sn);
702       regions.remove(hri);
703     }
704   }
705 
706   /**
707    * Checking if a region was assigned to a server which is not online now.
708    * If so, we should hold re-assign this region till SSH has split its wals.
709    * Once logs are split, the last assignment of this region will be reset,
710    * which means a null last assignment server is ok for re-assigning.
711    *
712    * A region server could be dead but we don't know it yet. We may
713    * think it's online falsely. Therefore if a server is online, we still
714    * need to confirm it reachable and having the expected start code.
715    */
716   synchronized boolean wasRegionOnDeadServer(final String encodedName) {
717     ServerName server = lastAssignments.get(encodedName);
718     return isServerDeadAndNotProcessed(server);
719   }
720 
721   synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
722     if (server == null) return false;
723     if (serverManager.isServerOnline(server)) {
724       String hostAndPort = server.getHostAndPort();
725       long startCode = server.getStartcode();
726       Long deadCode = deadServers.get(hostAndPort);
727       if (deadCode == null || startCode > deadCode.longValue()) {
728         if (serverManager.isServerReachable(server)) {
729           return false;
730         }
731         // The size of deadServers won't grow unbounded.
732         deadServers.put(hostAndPort, Long.valueOf(startCode));
733       }
734       // Watch out! If the server is not dead, the region could
735       // remain unassigned. That's why ServerManager#isServerReachable
736       // should use some retry.
737       //
738       // We cache this info since it is very unlikely for that
739       // instance to come back up later on. We don't want to expire
740       // the server since we prefer to let it die naturally.
741       LOG.warn("Couldn't reach online server " + server);
742     }
743     // Now, we know it's dead. Check if it's processed
744     return !processedServers.containsKey(server);
745   }
746 
747  /**
748    * Get the last region server a region was on for purpose of re-assignment,
749    * i.e. should the re-assignment be held back till log split is done?
750    */
751   synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
752     return lastAssignments.get(encodedName);
753   }
754 
755   synchronized void setLastRegionServerOfRegions(
756       final ServerName serverName, final List<HRegionInfo> regionInfos) {
757     for (HRegionInfo hri: regionInfos) {
758       setLastRegionServerOfRegion(serverName, hri.getEncodedName());
759     }
760   }
761 
762   synchronized void setLastRegionServerOfRegion(
763       final ServerName serverName, final String encodedName) {
764     lastAssignments.put(encodedName, serverName);
765   }
766 
767   synchronized boolean isRegionOnServer(
768       final HRegionInfo hri, final ServerName serverName) {
769     Set<HRegionInfo> regions = serverHoldings.get(serverName);
770     return regions == null ? false : regions.contains(hri);
771   }
772 
773   void splitRegion(HRegionInfo p,
774       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
775     regionStateStore.splitRegion(p, a, b, sn);
776     synchronized (this) {
777       // After PONR, split is considered to be done.
778       // Update server holdings to be aligned with the meta.
779       Set<HRegionInfo> regions = serverHoldings.get(sn);
780       if (regions == null) {
781         throw new IllegalStateException(sn + " should host some regions");
782       }
783       regions.remove(p);
784       regions.add(a);
785       regions.add(b);
786     }
787   }
788 
789   void mergeRegions(HRegionInfo p,
790       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
791     regionStateStore.mergeRegions(p, a, b, sn);
792     synchronized (this) {
793       // After PONR, merge is considered to be done.
794       // Update server holdings to be aligned with the meta.
795       Set<HRegionInfo> regions = serverHoldings.get(sn);
796       if (regions == null) {
797         throw new IllegalStateException(sn + " should host some regions");
798       }
799       regions.remove(a);
800       regions.remove(b);
801       regions.add(p);
802     }
803   }
804 
805   /**
806    * At cluster clean re/start, mark all user regions closed except those of tables
807    * that are excluded, such as disabled/disabling/enabling tables. All user regions
808    * and their previous locations are returned.
809    */
810   synchronized Map<HRegionInfo, ServerName> closeAllUserRegions(Set<TableName> excludedTables) {
811     boolean noExcludeTables = excludedTables == null || excludedTables.isEmpty();
812     Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
813     for(RegionState state: regionStates.values()) {
814       HRegionInfo hri = state.getRegion();
815       if (state.isSplit() || hri.isSplit()) {
816         continue;
817       }
818       TableName tableName = hri.getTable();
819       if (!TableName.META_TABLE_NAME.equals(tableName)
820           && (noExcludeTables || !excludedTables.contains(tableName))) {
821         toBeClosed.add(hri);
822       }
823     }
824     Map<HRegionInfo, ServerName> allUserRegions =
825       new HashMap<HRegionInfo, ServerName>(toBeClosed.size());
826     for (HRegionInfo hri: toBeClosed) {
827       RegionState regionState = updateRegionState(hri, State.CLOSED);
828       allUserRegions.put(hri, regionState.getServerName());
829     }
830     return allUserRegions;
831   }
832 
833   /**
834    * Compute the average load across all region servers.
835    * Currently, this uses a very naive computation - just uses the number of
836    * regions being served, ignoring stats about number of requests.
837    * @return the average load
838    */
839   protected synchronized double getAverageLoad() {
840     int numServers = 0, totalLoad = 0;
841     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
842       Set<HRegionInfo> regions = e.getValue();
843       ServerName serverName = e.getKey();
844       int regionCount = regions.size();
845       if (serverManager.isServerOnline(serverName)) {
846         totalLoad += regionCount;
847         numServers++;
848       }
849     }
850     if (numServers > 1) {
851       // The master region server holds only a couple regions.
852       // Don't consider this server in calculating the average load
853       // if there are other region servers to avoid possible confusion.
854       Set<HRegionInfo> hris = serverHoldings.get(server.getServerName());
855       if (hris != null) {
856         totalLoad -= hris.size();
857         numServers--;
858       }
859     }
860     return numServers == 0 ? 0.0 :
861       (double)totalLoad / (double)numServers;
862   }
863 
864   /**
865    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
866    * Can't let out original since it can change and at least the load balancer
867    * wants to iterate this exported list.  We need to synchronize on regions
868    * since all access to this.servers is under a lock on this.regions.
869    *
870    * @return A clone of current assignments by table.
871    */
872   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
873       getAssignmentsByTable() {
874     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
875       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
876     synchronized (this) {
877       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
878         Map<ServerName, List<HRegionInfo>> svrToRegions =
879           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
880         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
881           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
882         }
883         result.put(TableName.valueOf("ensemble"), svrToRegions);
884       } else {
885         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
886           for (HRegionInfo hri: e.getValue()) {
887             if (hri.isMetaRegion()) continue;
888             TableName tablename = hri.getTable();
889             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
890             if (svrToRegions == null) {
891               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
892               result.put(tablename, svrToRegions);
893             }
894             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
895             if (regions == null) {
896               regions = new ArrayList<HRegionInfo>();
897               svrToRegions.put(e.getKey(), regions);
898             }
899             regions.add(hri);
900           }
901         }
902       }
903     }
904 
905     Map<ServerName, ServerLoad>
906       onlineSvrs = serverManager.getOnlineServers();
907     // Take care of servers w/o assignments, and remove servers in draining mode
908     List<ServerName> drainingServers = this.serverManager.getDrainingServersList();
909     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
910       for (ServerName svr: onlineSvrs.keySet()) {
911         if (!map.containsKey(svr)) {
912           map.put(svr, new ArrayList<HRegionInfo>());
913         }
914       }
915       map.keySet().removeAll(drainingServers);
916     }
917     return result;
918   }
919 
920   protected RegionState getRegionState(final HRegionInfo hri) {
921     return getRegionState(hri.getEncodedName());
922   }
923 
924   /**
925    * Returns a clone of region assignments per server
926    * @return a Map of ServerName to a List of HRegionInfo's
927    */
928   protected synchronized Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer() {
929     Map<ServerName, List<HRegionInfo>> regionsByServer =
930         new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
931     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
932       regionsByServer.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
933     }
934     return regionsByServer;
935   }
936 
937   protected synchronized RegionState getRegionState(final String encodedName) {
938     return regionStates.get(encodedName);
939   }
940 
941   /**
942    * Get the HRegionInfo from cache, if not there, from the hbase:meta table
943    * @param  regionName
944    * @return HRegionInfo for the region
945    */
946   @SuppressWarnings("deprecation")
947   protected HRegionInfo getRegionInfo(final byte [] regionName) {
948     String encodedName = HRegionInfo.encodeRegionName(regionName);
949     RegionState regionState = getRegionState(encodedName);
950     if (regionState != null) {
951       return regionState.getRegion();
952     }
953 
954     try {
955       Pair<HRegionInfo, ServerName> p =
956         MetaTableAccessor.getRegion(server.getConnection(), regionName);
957       HRegionInfo hri = p == null ? null : p.getFirst();
958       if (hri != null) {
959         createRegionState(hri);
960       }
961       return hri;
962     } catch (IOException e) {
963       server.abort("Aborting because error occoured while reading "
964         + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
965       return null;
966     }
967   }
968 
969   static boolean isOneOfStates(RegionState regionState, State... states) {
970     State s = regionState != null ? regionState.getState() : null;
971     for (State state: states) {
972       if (s == state) return true;
973     }
974     return false;
975   }
976 
977   /**
978    * Update a region state. It will be put in transition if not already there.
979    */
980   private RegionState updateRegionState(final HRegionInfo hri,
981       final RegionState.State state, final ServerName serverName, long openSeqNum) {
982     if (state == RegionState.State.FAILED_CLOSE || state == RegionState.State.FAILED_OPEN) {
983       LOG.warn("Failed to open/close " + hri.getShortNameToLog()
984         + " on " + serverName + ", set to " + state);
985     }
986 
987     String encodedName = hri.getEncodedName();
988     RegionState regionState = new RegionState(
989       hri, state, System.currentTimeMillis(), serverName);
990     RegionState oldState = getRegionState(encodedName);
991     if (!regionState.equals(oldState)) {
992       LOG.info("Transition " + oldState + " to " + regionState);
993       // Persist region state before updating in-memory info, if needed
994       regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
995     }
996 
997     synchronized (this) {
998       regionsInTransition.put(encodedName, regionState);
999       regionStates.put(encodedName, regionState);
1000 
1001       // For these states, region should be properly closed.
1002       // There should be no log splitting issue.
1003       if ((state == State.CLOSED || state == State.MERGED
1004           || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
1005         ServerName last = lastAssignments.get(encodedName);
1006         if (last.equals(serverName)) {
1007           lastAssignments.remove(encodedName);
1008         } else {
1009           LOG.warn(encodedName + " moved to " + state + " on "
1010             + serverName + ", expected " + last);
1011         }
1012       }
1013 
1014       // Once a region is opened, record its last assignment right away.
1015       if (serverName != null && state == State.OPEN) {
1016         ServerName last = lastAssignments.get(encodedName);
1017         if (!serverName.equals(last)) {
1018           lastAssignments.put(encodedName, serverName);
1019           if (last != null && isServerDeadAndNotProcessed(last)) {
1020             LOG.warn(encodedName + " moved to " + serverName
1021               + ", while it's previous host " + last
1022               + " is dead but not processed yet");
1023           }
1024         }
1025       }
1026 
1027       // notify the change
1028       this.notifyAll();
1029     }
1030     return regionState;
1031   }
1032 }