View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.HashSet;
24  import java.util.Iterator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.Set;
28  import java.util.TreeMap;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.HRegionInfo;
36  import org.apache.hadoop.hbase.RegionTransition;
37  import org.apache.hadoop.hbase.Server;
38  import org.apache.hadoop.hbase.ServerLoad;
39  import org.apache.hadoop.hbase.ServerName;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.catalog.MetaReader;
42  import org.apache.hadoop.hbase.master.RegionState.State;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.hbase.util.Pair;
45  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
46  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
47  import org.apache.zookeeper.KeeperException;
48  
49  import com.google.common.base.Preconditions;
50  
51  /**
52   * Region state accountant. It holds the states of all regions in the memory.
53   * In normal scenario, it should match the meta table and the true region states.
54   *
55   * This map is used by AssignmentManager to track region states.
56   */
57  @InterfaceAudience.Private
58  public class RegionStates {
59    private static final Log LOG = LogFactory.getLog(RegionStates.class);
60  
61    /**
62     * Regions currently in transition.
63     */
64    final HashMap<String, RegionState> regionsInTransition;
65  
66    /**
67     * Region encoded name to state map.
68     * All the regions should be in this map.
69     */
70    private final Map<String, RegionState> regionStates;
71  
72    /**
73     * Server to regions assignment map.
74     * Contains the set of regions currently assigned to a given server.
75     */
76    private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
77  
78    /**
79     * Region to server assignment map.
80     * Contains the server a given region is currently assigned to.
81     */
82    private final TreeMap<HRegionInfo, ServerName> regionAssignments;
83  
84    /**
85     * Encoded region name to server assignment map for re-assignment
86     * purpose. Contains the server a given region is last known assigned
87     * to, which has not completed log splitting, so not assignable.
88     * If a region is currently assigned, this server info in this
89     * map should be the same as that in regionAssignments.
90     * However the info in regionAssignments is cleared when the region
91     * is offline while the info in lastAssignments is cleared when
92     * the region is closed or the server is dead and processed.
93     */
94    private final HashMap<String, ServerName> lastAssignments;
95  
96    /**
97     * Map a host port pair string to the latest start code
98     * of a region server which is known to be dead. It is dead
99     * to us, but server manager may not know it yet.
100    */
101   private final HashMap<String, Long> deadServers;
102 
103   /**
104    * Map a dead servers to the time when log split is done.
105    * Since log splitting is not ordered, we have to remember
106    * all processed instances. The map is cleaned up based
107    * on a configured time. By default, we assume a dead
108    * server should be done with log splitting in two hours.
109    */
110   private final HashMap<ServerName, Long> processedServers;
111   private long lastProcessedServerCleanTime;
112 
113   private final RegionStateStore regionStateStore;
114   private final ServerManager serverManager;
115   private final Server server;
116 
117   // The maximum time to keep a log split info in region states map
118   static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
119   static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
120 
121   RegionStates(final Server master,
122       final ServerManager serverManager, final RegionStateStore regionStateStore) {
123     regionStates = new HashMap<String, RegionState>();
124     regionsInTransition = new HashMap<String, RegionState>();
125     serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
126     regionAssignments = new TreeMap<HRegionInfo, ServerName>();
127     lastAssignments = new HashMap<String, ServerName>();
128     processedServers = new HashMap<ServerName, Long>();
129     deadServers = new HashMap<String, Long>();
130     this.regionStateStore = regionStateStore;
131     this.serverManager = serverManager;
132     this.server = master;
133   }
134 
135   /**
136    * @return an unmodifiable the region assignment map
137    */
138   @SuppressWarnings("unchecked")
139   public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
140     return (Map<HRegionInfo, ServerName>)regionAssignments.clone();
141   }
142 
143   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
144     return regionAssignments.get(hri);
145   }
146 
147   /**
148    * Get regions in transition and their states
149    */
150   @SuppressWarnings("unchecked")
151   public synchronized Map<String, RegionState> getRegionsInTransition() {
152     return (Map<String, RegionState>)regionsInTransition.clone();
153   }
154 
155   /**
156    * @return True if specified region in transition.
157    */
158   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
159     return regionsInTransition.containsKey(hri.getEncodedName());
160   }
161 
162   /**
163    * @return True if specified region in transition.
164    */
165   public synchronized boolean isRegionInTransition(final String encodedName) {
166     return regionsInTransition.containsKey(encodedName);
167   }
168 
169   /**
170    * @return True if any region in transition.
171    */
172   public synchronized boolean isRegionsInTransition() {
173     return !regionsInTransition.isEmpty();
174   }
175 
176   /**
177    * @return True if specified region assigned, and not in transition.
178    */
179   public synchronized boolean isRegionOnline(final HRegionInfo hri) {
180     return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
181   }
182 
183   /**
184    * @return True if specified region offline/closed, but not in transition.
185    * If the region is not in the map, it is offline to us too.
186    */
187   public synchronized boolean isRegionOffline(final HRegionInfo hri) {
188     return getRegionState(hri) == null || (!isRegionInTransition(hri)
189       && isRegionInState(hri, State.OFFLINE, State.CLOSED));
190   }
191 
192   /**
193    * @return True if specified region is in one of the specified states.
194    */
195   public boolean isRegionInState(
196       final HRegionInfo hri, final State... states) {
197     return isRegionInState(hri.getEncodedName(), states);
198   }
199 
200   /**
201    * @return True if specified region is in one of the specified states.
202    */
203   public boolean isRegionInState(
204       final String encodedName, final State... states) {
205     RegionState regionState = getRegionState(encodedName);
206     return isOneOfStates(regionState, states);
207   }
208 
209   /**
210    * Wait for the state map to be updated by assignment manager.
211    */
212   public synchronized void waitForUpdate(
213       final long timeout) throws InterruptedException {
214     this.wait(timeout);
215   }
216 
217   /**
218    * Get region transition state
219    */
220   public RegionState getRegionTransitionState(final HRegionInfo hri) {
221     return getRegionTransitionState(hri.getEncodedName());
222   }
223 
224   /**
225    * Get region transition state
226    */
227   public synchronized RegionState
228       getRegionTransitionState(final String encodedName) {
229     return regionsInTransition.get(encodedName);
230   }
231 
232   /**
233    * Add a list of regions to RegionStates. If a region is split
234    * and offline, its state will be SPLIT. Otherwise, its state will
235    * be OFFLINE. Region already in RegionStates will be skipped.
236    */
237   public void createRegionStates(
238       final List<HRegionInfo> hris) {
239     for (HRegionInfo hri: hris) {
240       createRegionState(hri);
241     }
242   }
243 
244   /**
245    * Add a region to RegionStates. If the region is split
246    * and offline, its state will be SPLIT. Otherwise, its state will
247    * be OFFLINE. If it is already in RegionStates, this call has
248    * no effect, and the original state is returned.
249    */
250   public RegionState createRegionState(final HRegionInfo hri) {
251     return createRegionState(hri, null, null);
252   }
253 
254   /**
255    * Add a region to RegionStates with the specified state.
256    * If the region is already in RegionStates, this call has
257    * no effect, and the original state is returned.
258    */
259   public synchronized RegionState createRegionState(
260       final HRegionInfo hri, State newState, ServerName serverName) {
261     if (newState == null || (newState == State.OPEN && serverName == null)) {
262       newState =  State.OFFLINE;
263     }
264     if (hri.isOffline() && hri.isSplit()) {
265       newState = State.SPLIT;
266       serverName = null;
267     }
268     String encodedName = hri.getEncodedName();
269     RegionState regionState = regionStates.get(encodedName);
270     if (regionState != null) {
271       LOG.warn("Tried to create a state for a region already in RegionStates, "
272         + "used existing: " + regionState + ", ignored new: " + newState);
273     } else {
274       regionState = new RegionState(hri, newState, serverName);
275       regionStates.put(encodedName, regionState);
276       if (newState == State.OPEN) {
277         regionAssignments.put(hri, serverName);
278         lastAssignments.put(encodedName, serverName);
279         Set<HRegionInfo> regions = serverHoldings.get(serverName);
280         if (regions == null) {
281           regions = new HashSet<HRegionInfo>();
282           serverHoldings.put(serverName, regions);
283         }
284         regions.add(hri);
285       } else if (!regionState.isUnassignable()) {
286         regionsInTransition.put(encodedName, regionState);
287       }
288     }
289     return regionState;
290   }
291 
292   /**
293    * Update a region state. It will be put in transition if not already there.
294    */
295   public RegionState updateRegionState(
296       final HRegionInfo hri, final State state) {
297     RegionState regionState = getRegionState(hri.getEncodedName());
298     return updateRegionState(hri, state,
299       regionState == null ? null : regionState.getServerName());
300   }
301 
302   /**
303    * Update a region state. It will be put in transition if not already there.
304    *
305    * If we can't find the region info based on the region name in
306    * the transition, log a warning and return null.
307    */
308   public RegionState updateRegionState(
309       final RegionTransition transition, final State state) {
310     byte [] regionName = transition.getRegionName();
311     HRegionInfo regionInfo = getRegionInfo(regionName);
312     if (regionInfo == null) {
313       String prettyRegionName = HRegionInfo.prettyPrint(
314         HRegionInfo.encodeRegionName(regionName));
315       LOG.warn("Failed to find region " + prettyRegionName
316         + " in updating its state to " + state
317         + " based on region transition " + transition);
318       return null;
319     }
320     return updateRegionState(regionInfo, state,
321       transition.getServerName());
322   }
323 
324   /**
325    * Update a region state. It will be put in transition if not already there.
326    */
327   public RegionState updateRegionState(
328       final HRegionInfo hri, final State state, final ServerName serverName) {
329     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
330   }
331 
332   public void regionOnline(
333       final HRegionInfo hri, final ServerName serverName) {
334     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
335   }
336 
337   /**
338    * A region is online, won't be in transition any more.
339    * We can't confirm it is really online on specified region server
340    * because it hasn't been put in region server's online region list yet.
341    */
342   public void regionOnline(final HRegionInfo hri,
343       final ServerName serverName, long openSeqNum) {
344     if (!serverManager.isServerOnline(serverName)) {
345       // This is possible if the region server dies before master gets a
346       // chance to handle ZK event in time. At this time, if the dead server
347       // is already processed by SSH, we should ignore this event.
348       // If not processed yet, ignore and let SSH deal with it.
349       LOG.warn("Ignored, " + hri.getEncodedName()
350         + " was opened on a dead server: " + serverName);
351       return;
352     }
353     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
354 
355     synchronized (this) {
356       regionsInTransition.remove(hri.getEncodedName());
357       ServerName oldServerName = regionAssignments.put(hri, serverName);
358       if (!serverName.equals(oldServerName)) {
359         LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
360         Set<HRegionInfo> regions = serverHoldings.get(serverName);
361         if (regions == null) {
362           regions = new HashSet<HRegionInfo>();
363           serverHoldings.put(serverName, regions);
364         }
365         regions.add(hri);
366         if (oldServerName != null) {
367           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
368           Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
369           oldRegions.remove(hri);
370           if (oldRegions.isEmpty()) {
371             serverHoldings.remove(oldServerName);
372           }
373         }
374       }
375     }
376   }
377 
378   /**
379    * A dead server's hlogs have been split so that all the regions
380    * used to be open on it can be safely assigned now. Mark them assignable.
381    */
382   public synchronized void logSplit(final ServerName serverName) {
383     for (Iterator<Map.Entry<String, ServerName>> it
384         = lastAssignments.entrySet().iterator(); it.hasNext();) {
385       Map.Entry<String, ServerName> e = it.next();
386       if (e.getValue().equals(serverName)) {
387         it.remove();
388       }
389     }
390     long now = System.currentTimeMillis();
391     if (LOG.isDebugEnabled()) {
392       LOG.debug("Adding to processed servers " + serverName);
393     }
394     processedServers.put(serverName, Long.valueOf(now));
395     Configuration conf = server.getConfiguration();
396     long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
397     // Doesn't have to be very accurate about the clean up time
398     if (now > lastProcessedServerCleanTime + obsoleteTime) {
399       lastProcessedServerCleanTime = now;
400       long cutoff = now - obsoleteTime;
401       for (Iterator<Map.Entry<ServerName, Long>> it
402           = processedServers.entrySet().iterator(); it.hasNext();) {
403         Map.Entry<ServerName, Long> e = it.next();
404         if (e.getValue().longValue() < cutoff) {
405           if (LOG.isDebugEnabled()) {
406             LOG.debug("Removed from processed servers " + e.getKey());
407           }
408           it.remove();
409         }
410       }
411     }
412   }
413 
414   /**
415    * Log split is done for a given region, so it is assignable now.
416    */
417   public void logSplit(final HRegionInfo region) {
418     clearLastAssignment(region);
419   }
420 
421   public synchronized void clearLastAssignment(final HRegionInfo region) {
422     lastAssignments.remove(region.getEncodedName());
423   }
424 
425   /**
426    * A region is offline, won't be in transition any more.
427    */
428   public void regionOffline(final HRegionInfo hri) {
429     regionOffline(hri, null);
430   }
431 
432   /**
433    * A region is offline, won't be in transition any more. Its state
434    * should be the specified expected state, which can only be
435    * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
436    */
437   public void regionOffline(
438       final HRegionInfo hri, final State expectedState) {
439     Preconditions.checkArgument(expectedState == null
440       || RegionState.isUnassignable(expectedState),
441         "Offlined region should not be " + expectedState);
442     if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
443       // Remove it from all region maps
444       deleteRegion(hri);
445       return;
446     }
447     State newState =
448       expectedState == null ? State.OFFLINE : expectedState;
449     updateRegionState(hri, newState);
450 
451     synchronized (this) {
452       regionsInTransition.remove(hri.getEncodedName());
453       ServerName oldServerName = regionAssignments.remove(hri);
454       if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
455         LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
456         Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
457         oldRegions.remove(hri);
458         if (oldRegions.isEmpty()) {
459           serverHoldings.remove(oldServerName);
460         }
461       }
462     }
463   }
464 
465   /**
466    * A server is offline, all regions on it are dead.
467    */
468   public synchronized List<HRegionInfo> serverOffline(
469       final ZooKeeperWatcher watcher, final ServerName sn) {
470     // Offline all regions on this server not already in transition.
471     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
472     Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
473     if (assignedRegions == null) {
474       assignedRegions = new HashSet<HRegionInfo>();
475     }
476 
477     // Offline regions outside the loop to avoid ConcurrentModificationException
478     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
479     for (HRegionInfo region : assignedRegions) {
480       // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
481       if (isRegionOnline(region)) {
482         regionsToOffline.add(region);
483       } else {
484         if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
485           LOG.debug("Offline splitting/merging region " + getRegionState(region));
486           try {
487             // Delete the ZNode if exists
488             ZKAssign.deleteNodeFailSilent(watcher, region);
489             regionsToOffline.add(region);
490           } catch (KeeperException ke) {
491             server.abort("Unexpected ZK exception deleting node " + region, ke);
492           }
493         }
494       }
495     }
496 
497     for (HRegionInfo hri : regionsToOffline) {
498       regionOffline(hri);
499     }
500 
501     for (RegionState state : regionsInTransition.values()) {
502       HRegionInfo hri = state.getRegion();
503       if (assignedRegions.contains(hri)) {
504         // Region is open on this region server, but in transition.
505         // This region must be moving away from this server, or splitting/merging.
506         // SSH will handle it, either skip assigning, or re-assign.
507         LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
508       } else if (sn.equals(state.getServerName())) {
509         // Region is in transition on this region server, and this
510         // region is not open on this server. So the region must be
511         // moving to this server from another one (i.e. opening or
512         // pending open on this server, was open on another one.
513         // Offline state is also kind of pending open if the region is in
514         // transition. The region could be in failed_close state too if we have
515         // tried several times to open it while this region server is not reachable)
516         if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
517           LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
518           rits.add(hri);
519         } else {
520           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
521         }
522       }
523     }
524 
525     this.notifyAll();
526     return rits;
527   }
528 
529   /**
530    * Gets the online regions of the specified table.
531    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
532    * Only returns <em>online</em> regions.  If a region on this table has been
533    * closed during a disable, etc., it will be included in the returned list.
534    * So, the returned list may not necessarily be ALL regions in this table, its
535    * all the ONLINE regions in the table.
536    * @param tableName
537    * @return Online regions from <code>tableName</code>
538    */
539   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
540     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
541     // boundary needs to have table's name but regionID 0 so that it is sorted
542     // before all table's regions.
543     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
544     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
545       if(!hri.getTable().equals(tableName)) break;
546       tableRegions.add(hri);
547     }
548     return tableRegions;
549   }
550 
551 
552   /**
553    * Wait on region to clear regions-in-transition.
554    * <p>
555    * If the region isn't in transition, returns immediately.  Otherwise, method
556    * blocks until the region is out of transition.
557    */
558   public synchronized void waitOnRegionToClearRegionsInTransition(
559       final HRegionInfo hri) throws InterruptedException {
560     if (!isRegionInTransition(hri)) return;
561 
562     while(!server.isStopped() && isRegionInTransition(hri)) {
563       RegionState rs = getRegionState(hri);
564       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
565       waitForUpdate(100);
566     }
567 
568     if (server.isStopped()) {
569       LOG.info("Giving up wait on region in " +
570         "transition because stoppable.isStopped is set");
571     }
572   }
573 
574   /**
575    * A table is deleted. Remove its regions from all internal maps.
576    * We loop through all regions assuming we don't delete tables too much.
577    */
578   public void tableDeleted(final TableName tableName) {
579     Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
580     synchronized (this) {
581       for (RegionState state: regionStates.values()) {
582         HRegionInfo region = state.getRegion();
583         if (region.getTable().equals(tableName)) {
584           regionsToDelete.add(region);
585         }
586       }
587     }
588     for (HRegionInfo region: regionsToDelete) {
589       deleteRegion(region);
590     }
591   }
592 
593   /**
594    * Checking if a region was assigned to a server which is not online now.
595    * If so, we should hold re-assign this region till SSH has split its hlogs.
596    * Once logs are split, the last assignment of this region will be reset,
597    * which means a null last assignment server is ok for re-assigning.
598    *
599    * A region server could be dead but we don't know it yet. We may
600    * think it's online falsely. Therefore if a server is online, we still
601    * need to confirm it reachable and having the expected start code.
602    */
603   synchronized boolean wasRegionOnDeadServer(final String encodedName) {
604     ServerName server = lastAssignments.get(encodedName);
605     return isServerDeadAndNotProcessed(server);
606   }
607 
608   synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
609     if (server == null) return false;
610     if (serverManager.isServerOnline(server)) {
611       String hostAndPort = server.getHostAndPort();
612       long startCode = server.getStartcode();
613       Long deadCode = deadServers.get(hostAndPort);
614       if (deadCode == null || startCode > deadCode.longValue()) {
615         if (serverManager.isServerReachable(server)) {
616           return false;
617         }
618         // The size of deadServers won't grow unbounded.
619         deadServers.put(hostAndPort, Long.valueOf(startCode));
620       }
621       // Watch out! If the server is not dead, the region could
622       // remain unassigned. That's why ServerManager#isServerReachable
623       // should use some retry.
624       //
625       // We cache this info since it is very unlikely for that
626       // instance to come back up later on. We don't want to expire
627       // the server since we prefer to let it die naturally.
628       LOG.warn("Couldn't reach online server " + server);
629     }
630     // Now, we know it's dead. Check if it's processed
631     return !processedServers.containsKey(server);
632   }
633 
634  /**
635    * Get the last region server a region was on for purpose of re-assignment,
636    * i.e. should the re-assignment be held back till log split is done?
637    */
638   synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
639     return lastAssignments.get(encodedName);
640   }
641 
642   synchronized void setLastRegionServerOfRegions(
643       final ServerName serverName, final List<HRegionInfo> regionInfos) {
644     for (HRegionInfo hri: regionInfos) {
645       setLastRegionServerOfRegion(serverName, hri.getEncodedName());
646     }
647   }
648 
649   synchronized void setLastRegionServerOfRegion(
650       final ServerName serverName, final String encodedName) {
651     lastAssignments.put(encodedName, serverName);
652   }
653 
654   /**
655    * At cluster clean re/start, mark all user regions closed except those of tables
656    * that are excluded, such as disabled/disabling/enabling tables. All user regions
657    * and their previous locations are returned.
658    */
659   synchronized Map<HRegionInfo, ServerName> closeAllUserRegions(Set<TableName> excludedTables) {
660     boolean noExcludeTables = excludedTables == null || excludedTables.isEmpty();
661     Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
662     for(RegionState state: regionStates.values()) {
663       HRegionInfo hri = state.getRegion();
664       TableName tableName = hri.getTable();
665       if (!TableName.META_TABLE_NAME.equals(tableName) && !hri.isSplit()
666           && (noExcludeTables || !excludedTables.contains(tableName))) {
667         toBeClosed.add(hri);
668       }
669     }
670     Map<HRegionInfo, ServerName> allUserRegions =
671       new HashMap<HRegionInfo, ServerName>(toBeClosed.size());
672     for (HRegionInfo hri: toBeClosed) {
673       RegionState regionState = updateRegionState(hri, State.CLOSED);
674       allUserRegions.put(hri, regionState.getServerName());
675     }
676     return allUserRegions;
677   }
678 
679   /**
680    * Compute the average load across all region servers.
681    * Currently, this uses a very naive computation - just uses the number of
682    * regions being served, ignoring stats about number of requests.
683    * @return the average load
684    */
685   protected synchronized double getAverageLoad() {
686     int numServers = 0, totalLoad = 0;
687     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
688       Set<HRegionInfo> regions = e.getValue();
689       ServerName serverName = e.getKey();
690       int regionCount = regions.size();
691       if (regionCount > 0 || serverManager.isServerOnline(serverName)) {
692         totalLoad += regionCount;
693         numServers++;
694       }
695     }
696     if (numServers > 1) {
697       // The master region server holds only a couple regions.
698       // Don't consider this server in calculating the average load
699       // if there are other region servers to avoid possible confusion.
700       Set<HRegionInfo> hris = serverHoldings.get(server.getServerName());
701       if (hris != null) {
702         totalLoad -= hris.size();
703         numServers--;
704       }
705     }
706     return numServers == 0 ? 0.0 :
707       (double)totalLoad / (double)numServers;
708   }
709 
710   /**
711    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
712    * Can't let out original since it can change and at least the load balancer
713    * wants to iterate this exported list.  We need to synchronize on regions
714    * since all access to this.servers is under a lock on this.regions.
715    *
716    * @return A clone of current assignments by table.
717    */
718   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
719       getAssignmentsByTable() {
720     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
721       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
722     synchronized (this) {
723       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
724         Map<ServerName, List<HRegionInfo>> svrToRegions =
725           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
726         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
727           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
728         }
729         result.put(TableName.valueOf("ensemble"), svrToRegions);
730       } else {
731         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
732           for (HRegionInfo hri: e.getValue()) {
733             if (hri.isMetaRegion()) continue;
734             TableName tablename = hri.getTable();
735             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
736             if (svrToRegions == null) {
737               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
738               result.put(tablename, svrToRegions);
739             }
740             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
741             if (regions == null) {
742               regions = new ArrayList<HRegionInfo>();
743               svrToRegions.put(e.getKey(), regions);
744             }
745             regions.add(hri);
746           }
747         }
748       }
749     }
750 
751     Map<ServerName, ServerLoad>
752       onlineSvrs = serverManager.getOnlineServers();
753     // Take care of servers w/o assignments.
754     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
755       for (ServerName svr: onlineSvrs.keySet()) {
756         if (!map.containsKey(svr)) {
757           map.put(svr, new ArrayList<HRegionInfo>());
758         }
759       }
760     }
761     return result;
762   }
763 
764   protected RegionState getRegionState(final HRegionInfo hri) {
765     return getRegionState(hri.getEncodedName());
766   }
767 
768   protected synchronized RegionState getRegionState(final String encodedName) {
769     return regionStates.get(encodedName);
770   }
771 
772   /**
773    * Get the HRegionInfo from cache, if not there, from the hbase:meta table
774    * @param  regionName
775    * @return HRegionInfo for the region
776    */
777   protected HRegionInfo getRegionInfo(final byte [] regionName) {
778     String encodedName = HRegionInfo.encodeRegionName(regionName);
779     RegionState regionState = getRegionState(encodedName);
780     if (regionState != null) {
781       return regionState.getRegion();
782     }
783 
784     try {
785       Pair<HRegionInfo, ServerName> p =
786         MetaReader.getRegion(server.getCatalogTracker(), regionName);
787       HRegionInfo hri = p == null ? null : p.getFirst();
788       if (hri != null) {
789         createRegionState(hri);
790       }
791       return hri;
792     } catch (IOException e) {
793       server.abort("Aborting because error occoured while reading "
794         + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
795       return null;
796     }
797   }
798 
799   static boolean isOneOfStates(RegionState regionState, State... states) {
800     State s = regionState != null ? regionState.getState() : null;
801     for (State state: states) {
802       if (s == state) return true;
803     }
804     return false;
805   }
806 
807   /**
808    * Update a region state. It will be put in transition if not already there.
809    */
810   private RegionState updateRegionState(final HRegionInfo hri,
811       final State state, final ServerName serverName, long openSeqNum) {
812     if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
813       LOG.warn("Failed to open/close " + hri.getShortNameToLog()
814         + " on " + serverName + ", set to " + state);
815     }
816 
817     String encodedName = hri.getEncodedName();
818     RegionState regionState = new RegionState(
819       hri, state, System.currentTimeMillis(), serverName);
820     RegionState oldState = getRegionState(encodedName);
821     if (!regionState.equals(oldState)) {
822       LOG.info("Transition " + oldState + " to " + regionState);
823       // Persist region state before updating in-memory info, if needed
824       regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
825     }
826 
827     synchronized (this) {
828       regionsInTransition.put(encodedName, regionState);
829       regionStates.put(encodedName, regionState);
830 
831       // For these states, region should be properly closed.
832       // There should be no log splitting issue.
833       if ((state == State.CLOSED || state == State.MERGED
834           || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
835         ServerName last = lastAssignments.get(encodedName);
836         if (last.equals(serverName)) {
837           lastAssignments.remove(encodedName);
838         } else {
839           LOG.warn(encodedName + " moved to " + state + " on "
840             + serverName + ", expected " + last);
841         }
842       }
843 
844       // Once a region is opened, record its last assignment right away.
845       if (serverName != null && state == State.OPEN) {
846         ServerName last = lastAssignments.get(encodedName);
847         if (!serverName.equals(last)) {
848           lastAssignments.put(encodedName, serverName);
849           if (last != null && isServerDeadAndNotProcessed(last)) {
850             LOG.warn(encodedName + " moved to " + serverName
851               + ", while it's previous host " + last
852               + " is dead but not processed yet");
853           }
854         }
855       }
856 
857       // notify the change
858       this.notifyAll();
859     }
860     return regionState;
861   }
862 
863   /**
864    * Remove a region from all state maps.
865    */
866   private synchronized void deleteRegion(final HRegionInfo hri) {
867     String encodedName = hri.getEncodedName();
868     regionsInTransition.remove(encodedName);
869     regionStates.remove(encodedName);
870     lastAssignments.remove(encodedName);
871     ServerName sn = regionAssignments.remove(hri);
872     if (sn != null) {
873       Set<HRegionInfo> regions = serverHoldings.get(sn);
874       regions.remove(hri);
875     }
876   }
877 }