View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.Iterator;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Set;
29  import java.util.TreeMap;
30  
31  import com.google.common.annotations.VisibleForTesting;
32  import com.google.common.base.Preconditions;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.MetaTableAccessor;
42  import org.apache.hadoop.hbase.ServerLoad;
43  import org.apache.hadoop.hbase.ServerName;
44  import org.apache.hadoop.hbase.TableName;
45  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
46  import org.apache.hadoop.hbase.master.RegionState.State;
47  import org.apache.hadoop.hbase.client.TableState;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.FSUtils;
50  import org.apache.hadoop.hbase.util.Pair;
51  
52  /**
53   * Region state accountant. It holds the states of all regions in the memory.
54   * In normal scenario, it should match the meta table and the true region states.
55   *
56   * This map is used by AssignmentManager to track region states.
57   */
58  @InterfaceAudience.Private
59  public class RegionStates {
60    private static final Log LOG = LogFactory.getLog(RegionStates.class);
61  
62    /**
63     * Regions currently in transition.
64     */
65    final HashMap<String, RegionState> regionsInTransition =
66      new HashMap<String, RegionState>();
67  
68    /**
69     * Region encoded name to state map.
70     * All the regions should be in this map.
71     */
72    private final Map<String, RegionState> regionStates =
73      new HashMap<String, RegionState>();
74  
75    /**
76     * Holds mapping of table -> region state
77     */
78    private final Map<TableName, Map<String, RegionState>> regionStatesTableIndex =
79        new HashMap<TableName, Map<String, RegionState>>();
80  
81    /**
82     * Server to regions assignment map.
83     * Contains the set of regions currently assigned to a given server.
84     */
85    private final Map<ServerName, Set<HRegionInfo>> serverHoldings =
86      new HashMap<ServerName, Set<HRegionInfo>>();
87  
88    /**
89     * Maintains the mapping from the default region to the replica regions.
90     */
91    private final Map<HRegionInfo, Set<HRegionInfo>> defaultReplicaToOtherReplicas =
92      new HashMap<HRegionInfo, Set<HRegionInfo>>();
93  
94    /**
95     * Region to server assignment map.
96     * Contains the server a given region is currently assigned to.
97     */
98    private final TreeMap<HRegionInfo, ServerName> regionAssignments =
99      new TreeMap<HRegionInfo, ServerName>();
100 
101   /**
102    * Encoded region name to server assignment map for re-assignment
103    * purpose. Contains the server a given region is last known assigned
104    * to, which has not completed log splitting, so not assignable.
105    * If a region is currently assigned, this server info in this
106    * map should be the same as that in regionAssignments.
107    * However the info in regionAssignments is cleared when the region
108    * is offline while the info in lastAssignments is cleared when
109    * the region is closed or the server is dead and processed.
110    */
111   private final HashMap<String, ServerName> lastAssignments =
112     new HashMap<String, ServerName>();
113 
114   /**
115    * Encoded region name to server assignment map for the
116    * purpose to clean up serverHoldings when a region is online
117    * on a new server. When the region is offline from the previous
118    * server, we cleaned up regionAssignments so that it has the
119    * latest assignment map. But we didn't clean up serverHoldings
120    * to match the meta. We need this map to find out the old server
121    * whose serverHoldings needs cleanup, given a moved region.
122    */
123   private final HashMap<String, ServerName> oldAssignments =
124     new HashMap<String, ServerName>();
125 
126   /**
127    * Map a host port pair string to the latest start code
128    * of a region server which is known to be dead. It is dead
129    * to us, but server manager may not know it yet.
130    */
131   private final HashMap<String, Long> deadServers =
132     new HashMap<String, Long>();
133 
134   /**
135    * Map a dead servers to the time when log split is done.
136    * Since log splitting is not ordered, we have to remember
137    * all processed instances. The map is cleaned up based
138    * on a configured time. By default, we assume a dead
139    * server should be done with log splitting in two hours.
140    */
141   private final HashMap<ServerName, Long> processedServers =
142     new HashMap<ServerName, Long>();
143   private long lastProcessedServerCleanTime;
144 
145   private final TableStateManager tableStateManager;
146   private final RegionStateStore regionStateStore;
147   private final ServerManager serverManager;
148   private final MasterServices server;
149 
150   // The maximum time to keep a log split info in region states map
151   static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
152   static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
153 
154   RegionStates(final MasterServices master, final TableStateManager tableStateManager,
155       final ServerManager serverManager, final RegionStateStore regionStateStore) {
156     this.tableStateManager = tableStateManager;
157     this.regionStateStore = regionStateStore;
158     this.serverManager = serverManager;
159     this.server = master;
160   }
161 
162   /**
163    * @return a copy of the region assignment map
164    */
165   public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
166     return new TreeMap<HRegionInfo, ServerName>(regionAssignments);
167   }
168 
169   /**
170    * Return the replicas (including default) for the regions grouped by ServerName
171    * @param regions
172    * @return a pair containing the groupings as a map
173    */
174   synchronized Map<ServerName, List<HRegionInfo>> getRegionAssignments(
175     Collection<HRegionInfo> regions) {
176     Map<ServerName, List<HRegionInfo>> map = new HashMap<ServerName, List<HRegionInfo>>();
177     for (HRegionInfo region : regions) {
178       HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(region);
179       Set<HRegionInfo> allReplicas = defaultReplicaToOtherReplicas.get(defaultReplica);
180       if (allReplicas != null) {
181         for (HRegionInfo hri : allReplicas) {
182           ServerName server = regionAssignments.get(hri);
183           if (server != null) {
184             List<HRegionInfo> regionsOnServer = map.get(server);
185             if (regionsOnServer == null) {
186               regionsOnServer = new ArrayList<HRegionInfo>(1);
187               map.put(server, regionsOnServer);
188             }
189             regionsOnServer.add(hri);
190           }
191         }
192       }
193     }
194     return map;
195   }
196 
197   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
198     return regionAssignments.get(hri);
199   }
200 
201   /**
202    * Get regions in transition and their states
203    */
204   @SuppressWarnings("unchecked")
205   public synchronized Map<String, RegionState> getRegionsInTransition() {
206     return (Map<String, RegionState>)regionsInTransition.clone();
207   }
208 
209   /**
210    * @return True if specified region in transition.
211    */
212   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
213     return regionsInTransition.containsKey(hri.getEncodedName());
214   }
215 
216   /**
217    * @return True if specified region in transition.
218    */
219   public synchronized boolean isRegionInTransition(final String encodedName) {
220     return regionsInTransition.containsKey(encodedName);
221   }
222 
223   /**
224    * @return True if any region in transition.
225    */
226   public synchronized boolean isRegionsInTransition() {
227     return !regionsInTransition.isEmpty();
228   }
229 
230   /**
231    * @return True if hbase:meta table region is in transition.
232    */
233   public synchronized boolean isMetaRegionInTransition() {
234     for (RegionState state : regionsInTransition.values()) {
235       if (state.getRegion().isMetaRegion()) return true;
236     }
237     return false;
238   }
239 
240   /**
241    * @return True if specified region assigned, and not in transition.
242    */
243   public synchronized boolean isRegionOnline(final HRegionInfo hri) {
244     return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
245   }
246 
247   /**
248    * @return True if specified region offline/closed, but not in transition.
249    * If the region is not in the map, it is offline to us too.
250    */
251   public synchronized boolean isRegionOffline(final HRegionInfo hri) {
252     return getRegionState(hri) == null || (!isRegionInTransition(hri)
253       && isRegionInState(hri, State.OFFLINE, State.CLOSED));
254   }
255 
256   /**
257    * @return True if specified region is in one of the specified states.
258    */
259   public boolean isRegionInState(
260       final HRegionInfo hri, final State... states) {
261     return isRegionInState(hri.getEncodedName(), states);
262   }
263 
264   /**
265    * @return True if specified region is in one of the specified states.
266    */
267   public boolean isRegionInState(
268       final String encodedName, final State... states) {
269     RegionState regionState = getRegionState(encodedName);
270     return isOneOfStates(regionState, states);
271   }
272 
273   /**
274    * Wait for the state map to be updated by assignment manager.
275    */
276   public synchronized void waitForUpdate(
277       final long timeout) throws InterruptedException {
278     this.wait(timeout);
279   }
280 
281   /**
282    * Get region transition state
283    */
284   public RegionState getRegionTransitionState(final HRegionInfo hri) {
285     return getRegionTransitionState(hri.getEncodedName());
286   }
287 
288   /**
289    * Get region transition state
290    */
291   public synchronized RegionState
292       getRegionTransitionState(final String encodedName) {
293     return regionsInTransition.get(encodedName);
294   }
295 
296   /**
297    * Add a list of regions to RegionStates. If a region is split
298    * and offline, its state will be SPLIT. Otherwise, its state will
299    * be OFFLINE. Region already in RegionStates will be skipped.
300    */
301   public void createRegionStates(
302       final List<HRegionInfo> hris) {
303     for (HRegionInfo hri: hris) {
304       createRegionState(hri);
305     }
306   }
307 
308   /**
309    * Add a region to RegionStates. If the region is split
310    * and offline, its state will be SPLIT. Otherwise, its state will
311    * be OFFLINE. If it is already in RegionStates, this call has
312    * no effect, and the original state is returned.
313    */
314   public RegionState createRegionState(final HRegionInfo hri) {
315     return createRegionState(hri, null, null, null);
316   }
317 
318   /**
319    * Add a region to RegionStates with the specified state.
320    * If the region is already in RegionStates, this call has
321    * no effect, and the original state is returned.
322    *
323    * @param hri the region info to create a state for
324    * @param newState the state to the region in set to
325    * @param serverName the server the region is transitioning on
326    * @param lastHost the last server that hosts the region
327    * @return the current state
328    */
329   public synchronized RegionState createRegionState(final HRegionInfo hri,
330       State newState, ServerName serverName, ServerName lastHost) {
331     if (newState == null || (newState == State.OPEN && serverName == null)) {
332       newState =  State.OFFLINE;
333     }
334     if (hri.isOffline() && hri.isSplit()) {
335       newState = State.SPLIT;
336       serverName = null;
337     }
338     String encodedName = hri.getEncodedName();
339     RegionState regionState = regionStates.get(encodedName);
340     if (regionState != null) {
341       LOG.warn("Tried to create a state for a region already in RegionStates, "
342         + "used existing: " + regionState + ", ignored new: " + newState);
343     } else {
344       regionState = new RegionState(hri, newState, serverName);
345       putRegionState(regionState);
346       if (newState == State.OPEN) {
347         if (!serverName.equals(lastHost)) {
348           LOG.warn("Open region's last host " + lastHost
349             + " should be the same as the current one " + serverName
350             + ", ignored the last and used the current one");
351           lastHost = serverName;
352         }
353         lastAssignments.put(encodedName, lastHost);
354         regionAssignments.put(hri, lastHost);
355       } else if (!isOneOfStates(regionState, State.MERGED, State.SPLIT, State.OFFLINE)) {
356         regionsInTransition.put(encodedName, regionState);
357       }
358       if (lastHost != null && newState != State.SPLIT) {
359         addToServerHoldings(lastHost, hri);
360         if (newState != State.OPEN) {
361           oldAssignments.put(encodedName, lastHost);
362         }
363       }
364     }
365     return regionState;
366   }
367 
368   private RegionState putRegionState(RegionState regionState) {
369     HRegionInfo hri = regionState.getRegion();
370     String encodedName = hri.getEncodedName();
371     TableName table = hri.getTable();
372     RegionState oldState = regionStates.put(encodedName, regionState);
373     Map<String, RegionState> map = regionStatesTableIndex.get(table);
374     if (map == null) {
375       map = new HashMap<String, RegionState>();
376       regionStatesTableIndex.put(table, map);
377     }
378     map.put(encodedName, regionState);
379     return oldState;
380   }
381 
382   /**
383    * Update a region state. It will be put in transition if not already there.
384    */
385   public RegionState updateRegionState(
386       final HRegionInfo hri, final State state) {
387     RegionState regionState = getRegionState(hri.getEncodedName());
388     return updateRegionState(hri, state,
389       regionState == null ? null : regionState.getServerName());
390   }
391 
392   /**
393    * Update a region state. It will be put in transition if not already there.
394    */
395   public RegionState updateRegionState(
396       final HRegionInfo hri, final State state, final ServerName serverName) {
397     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
398   }
399 
400   public void regionOnline(final HRegionInfo hri, final ServerName serverName) {
401     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
402   }
403 
404   /**
405    * A region is online, won't be in transition any more.
406    * We can't confirm it is really online on specified region server
407    * because it hasn't been put in region server's online region list yet.
408    */
409   public void regionOnline(final HRegionInfo hri, final ServerName serverName, long openSeqNum) {
410     String encodedName = hri.getEncodedName();
411     if (!serverManager.isServerOnline(serverName)) {
412       // This is possible if the region server dies before master gets a
413       // chance to handle ZK event in time. At this time, if the dead server
414       // is already processed by SSH, we should ignore this event.
415       // If not processed yet, ignore and let SSH deal with it.
416       LOG.warn("Ignored, " + encodedName + " was opened on a dead server: " + serverName);
417       return;
418     }
419     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
420 
421     synchronized (this) {
422       regionsInTransition.remove(encodedName);
423       ServerName oldServerName = regionAssignments.put(hri, serverName);
424       if (!serverName.equals(oldServerName)) {
425         if (LOG.isDebugEnabled()) {
426           LOG.debug("Onlined " + hri.getShortNameToLog() + " on " + serverName);
427         }
428         addToServerHoldings(serverName, hri);
429         addToReplicaMapping(hri);
430         if (oldServerName == null) {
431           oldServerName = oldAssignments.remove(encodedName);
432         }
433         if (oldServerName != null
434             && !oldServerName.equals(serverName)
435             && serverHoldings.containsKey(oldServerName)) {
436           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
437           removeFromServerHoldings(oldServerName, hri);
438         }
439       }
440     }
441   }
442 
443   private void addToServerHoldings(ServerName serverName, HRegionInfo hri) {
444     Set<HRegionInfo> regions = serverHoldings.get(serverName);
445     if (regions == null) {
446       regions = new HashSet<HRegionInfo>();
447       serverHoldings.put(serverName, regions);
448     }
449     regions.add(hri);
450   }
451 
452   private void addToReplicaMapping(HRegionInfo hri) {
453     HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
454     Set<HRegionInfo> replicas =
455         defaultReplicaToOtherReplicas.get(defaultReplica);
456     if (replicas == null) {
457       replicas = new HashSet<HRegionInfo>();
458       defaultReplicaToOtherReplicas.put(defaultReplica, replicas);
459     }
460     replicas.add(hri);
461   }
462 
463   private void removeFromServerHoldings(ServerName serverName, HRegionInfo hri) {
464     Set<HRegionInfo> oldRegions = serverHoldings.get(serverName);
465     oldRegions.remove(hri);
466     if (oldRegions.isEmpty()) {
467       serverHoldings.remove(serverName);
468     }
469   }
470 
471   private void removeFromReplicaMapping(HRegionInfo hri) {
472     HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
473     Set<HRegionInfo> replicas = defaultReplicaToOtherReplicas.get(defaultReplica);
474     if (replicas != null) {
475       replicas.remove(hri);
476       if (replicas.isEmpty()) {
477         defaultReplicaToOtherReplicas.remove(defaultReplica);
478       }
479     }
480   }
481 
482   /**
483    * A dead server's wals have been split so that all the regions
484    * used to be open on it can be safely assigned now. Mark them assignable.
485    */
486   public synchronized void logSplit(final ServerName serverName) {
487     for (Iterator<Map.Entry<String, ServerName>> it
488         = lastAssignments.entrySet().iterator(); it.hasNext();) {
489       Map.Entry<String, ServerName> e = it.next();
490       if (e.getValue().equals(serverName)) {
491         it.remove();
492       }
493     }
494     long now = System.currentTimeMillis();
495     if (LOG.isDebugEnabled()) {
496       LOG.debug("Adding to log splitting servers " + serverName);
497     }
498     processedServers.put(serverName, Long.valueOf(now));
499     Configuration conf = server.getConfiguration();
500     long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
501     // Doesn't have to be very accurate about the clean up time
502     if (now > lastProcessedServerCleanTime + obsoleteTime) {
503       lastProcessedServerCleanTime = now;
504       long cutoff = now - obsoleteTime;
505       for (Iterator<Map.Entry<ServerName, Long>> it
506           = processedServers.entrySet().iterator(); it.hasNext();) {
507         Map.Entry<ServerName, Long> e = it.next();
508         if (e.getValue().longValue() < cutoff) {
509           if (LOG.isDebugEnabled()) {
510             LOG.debug("Removed from log splitting servers " + e.getKey());
511           }
512           it.remove();
513         }
514       }
515     }
516   }
517 
518   /**
519    * Log split is done for a given region, so it is assignable now.
520    */
521   public void logSplit(final HRegionInfo region) {
522     clearLastAssignment(region);
523   }
524 
525   public synchronized void clearLastAssignment(final HRegionInfo region) {
526     lastAssignments.remove(region.getEncodedName());
527   }
528 
529   /**
530    * A region is offline, won't be in transition any more.
531    */
532   public void regionOffline(final HRegionInfo hri) {
533     regionOffline(hri, null);
534   }
535 
536   /**
537    * A region is offline, won't be in transition any more. Its state
538    * should be the specified expected state, which can only be
539    * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
540    */
541   public void regionOffline(
542       final HRegionInfo hri, final State expectedState) {
543     Preconditions.checkArgument(expectedState == null
544       || RegionState.isUnassignable(expectedState),
545         "Offlined region should not be " + expectedState);
546     if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
547       // Remove it from all region maps
548       deleteRegion(hri);
549       return;
550     }
551     State newState =
552       expectedState == null ? State.OFFLINE : expectedState;
553     updateRegionState(hri, newState);
554     String encodedName = hri.getEncodedName();
555     synchronized (this) {
556       regionsInTransition.remove(encodedName);
557       ServerName oldServerName = regionAssignments.remove(hri);
558       if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
559         if (newState == State.MERGED || newState == State.SPLIT
560             || hri.isMetaRegion() || tableStateManager.isTableState(hri.getTable(),
561               TableState.State.DISABLED, TableState.State.DISABLING)) {
562           // Offline the region only if it's merged/split, or the table is disabled/disabling.
563           // Otherwise, offline it from this server only when it is online on a different server.
564           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
565           removeFromServerHoldings(oldServerName, hri);
566           removeFromReplicaMapping(hri);
567         } else {
568           // Need to remember it so that we can offline it from this
569           // server when it is online on a different server.
570           oldAssignments.put(encodedName, oldServerName);
571         }
572       }
573     }
574   }
575 
576   /**
577    * A server is offline, all regions on it are dead.
578    */
579   public List<HRegionInfo> serverOffline(final ServerName sn) {
580     // Offline all regions on this server not already in transition.
581     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
582     Set<HRegionInfo> regionsToCleanIfNoMetaEntry = new HashSet<HRegionInfo>();
583     // Offline regions outside the loop and synchronized block to avoid
584     // ConcurrentModificationException and deadlock in case of meta anassigned,
585     // but RegionState a blocked.
586     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
587     synchronized (this) {
588       Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
589       if (assignedRegions == null) {
590         assignedRegions = new HashSet<HRegionInfo>();
591       }
592 
593       for (HRegionInfo region : assignedRegions) {
594         // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
595         if (isRegionOnline(region)) {
596           regionsToOffline.add(region);
597         } else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
598           LOG.debug("Offline splitting/merging region " + getRegionState(region));
599           regionsToOffline.add(region);
600         }
601       }
602 
603       for (RegionState state : regionsInTransition.values()) {
604         HRegionInfo hri = state.getRegion();
605         if (assignedRegions.contains(hri)) {
606           // Region is open on this region server, but in transition.
607           // This region must be moving away from this server, or splitting/merging.
608           // SSH will handle it, either skip assigning, or re-assign.
609           LOG.info("Transitioning " + state + " will be handled by ServerCrashProcedure for " + sn);
610         } else if (sn.equals(state.getServerName())) {
611           // Region is in transition on this region server, and this
612           // region is not open on this server. So the region must be
613           // moving to this server from another one (i.e. opening or
614           // pending open on this server, was open on another one.
615           // Offline state is also kind of pending open if the region is in
616           // transition. The region could be in failed_close state too if we have
617           // tried several times to open it while this region server is not reachable)
618           if (isOneOfStates(state, State.OPENING, State.PENDING_OPEN,
619               State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) {
620             LOG.info("Found region in " + state +
621               " to be reassigned by ServerCrashProcedure for " + sn);
622             rits.add(hri);
623           } else if (isOneOfStates(state, State.SPLITTING_NEW)) {
624             regionsToCleanIfNoMetaEntry.add(state.getRegion());
625           } else {
626             LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
627           }
628         }
629       }
630       this.notifyAll();
631     }
632 
633     for (HRegionInfo hri : regionsToOffline) {
634       regionOffline(hri);
635     }
636 
637     cleanIfNoMetaEntry(regionsToCleanIfNoMetaEntry);
638     return rits;
639   }
640 
641   /**
642    * This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held.
643    * @param hris The hris to check if empty in hbase:meta and if so, clean them up.
644    */
645   private void cleanIfNoMetaEntry(Set<HRegionInfo> hris) {
646     if (hris.isEmpty()) return;
647     for (HRegionInfo hri: hris) {
648       try {
649         // This is RPC to meta table. It is done while we have a synchronize on
650         // regionstates. No progress will be made if meta is not available at this time.
651         // This is a cleanup task. Not critical.
652         if (MetaTableAccessor.getRegion(server.getConnection(), hri.getEncodedNameAsBytes()) ==
653             null) {
654           regionOffline(hri);
655           FSUtils.deleteRegionDir(server.getConfiguration(), hri);
656         }
657       } catch (IOException e) {
658         LOG.warn("Got exception while deleting " + hri + " directories from file system.", e);
659       }
660     }
661   }
662 
663   /**
664    * Gets the online regions of the specified table.
665    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
666    * Only returns <em>online</em> regions.  If a region on this table has been
667    * closed during a disable, etc., it will be included in the returned list.
668    * So, the returned list may not necessarily be ALL regions in this table, its
669    * all the ONLINE regions in the table.
670    * @param tableName
671    * @return Online regions from <code>tableName</code>
672    */
673   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
674     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
675     // boundary needs to have table's name but regionID 0 so that it is sorted
676     // before all table's regions.
677     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
678     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
679       if(!hri.getTable().equals(tableName)) break;
680       tableRegions.add(hri);
681     }
682     return tableRegions;
683   }
684 
685   /**
686    * Gets current state of all regions of the table.
687    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
688    * Method guaranteed to return keys for all states
689    * in {@link org.apache.hadoop.hbase.master.RegionState.State}
690    *
691    * @param tableName
692    * @return Online regions from <code>tableName</code>
693    */
694   public synchronized Map<RegionState.State, List<HRegionInfo>>
695   getRegionByStateOfTable(TableName tableName) {
696     Map<RegionState.State, List<HRegionInfo>> tableRegions =
697         new HashMap<State, List<HRegionInfo>>();
698     for (State state : State.values()) {
699       tableRegions.put(state, new ArrayList<HRegionInfo>());
700     }
701     Map<String, RegionState> indexMap = regionStatesTableIndex.get(tableName);
702     if (indexMap == null)
703       return tableRegions;
704     for (RegionState regionState : indexMap.values()) {
705       tableRegions.get(regionState.getState()).add(regionState.getRegion());
706     }
707     return tableRegions;
708   }
709 
710   /**
711    * Wait on region to clear regions-in-transition.
712    * <p>
713    * If the region isn't in transition, returns immediately.  Otherwise, method
714    * blocks until the region is out of transition.
715    */
716   public synchronized void waitOnRegionToClearRegionsInTransition(
717       final HRegionInfo hri) throws InterruptedException {
718     if (!isRegionInTransition(hri)) return;
719 
720     while(!server.isStopped() && isRegionInTransition(hri)) {
721       RegionState rs = getRegionState(hri);
722       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
723       waitForUpdate(100);
724     }
725 
726     if (server.isStopped()) {
727       LOG.info("Giving up wait on region in " +
728         "transition because stoppable.isStopped is set");
729     }
730   }
731 
732   /**
733    * A table is deleted. Remove its regions from all internal maps.
734    * We loop through all regions assuming we don't delete tables too much.
735    */
736   public void tableDeleted(final TableName tableName) {
737     Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
738     synchronized (this) {
739       for (RegionState state: regionStates.values()) {
740         HRegionInfo region = state.getRegion();
741         if (region.getTable().equals(tableName)) {
742           regionsToDelete.add(region);
743         }
744       }
745     }
746     for (HRegionInfo region: regionsToDelete) {
747       deleteRegion(region);
748     }
749   }
750 
751   /**
752    * Get a copy of all regions assigned to a server
753    */
754   public synchronized Set<HRegionInfo> getServerRegions(ServerName serverName) {
755     Set<HRegionInfo> regions = serverHoldings.get(serverName);
756     if (regions == null) return null;
757     return new HashSet<HRegionInfo>(regions);
758   }
759 
760   /**
761    * Remove a region from all state maps.
762    */
763   @VisibleForTesting
764   public synchronized void deleteRegion(final HRegionInfo hri) {
765     String encodedName = hri.getEncodedName();
766     regionsInTransition.remove(encodedName);
767     regionStates.remove(encodedName);
768     TableName table = hri.getTable();
769     Map<String, RegionState> indexMap = regionStatesTableIndex.get(table);
770     indexMap.remove(encodedName);
771     if (indexMap.size() == 0)
772       regionStatesTableIndex.remove(table);
773     lastAssignments.remove(encodedName);
774     ServerName sn = regionAssignments.remove(hri);
775     if (sn != null) {
776       Set<HRegionInfo> regions = serverHoldings.get(sn);
777       regions.remove(hri);
778     }
779   }
780 
781   /**
782    * Checking if a region was assigned to a server which is not online now.
783    * If so, we should hold re-assign this region till SSH has split its wals.
784    * Once logs are split, the last assignment of this region will be reset,
785    * which means a null last assignment server is ok for re-assigning.
786    *
787    * A region server could be dead but we don't know it yet. We may
788    * think it's online falsely. Therefore if a server is online, we still
789    * need to confirm it reachable and having the expected start code.
790    */
791   synchronized boolean wasRegionOnDeadServer(final String encodedName) {
792     ServerName server = lastAssignments.get(encodedName);
793     return isServerDeadAndNotProcessed(server);
794   }
795 
796   synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
797     if (server == null) return false;
798     if (serverManager.isServerOnline(server)) {
799       String hostAndPort = server.getHostAndPort();
800       long startCode = server.getStartcode();
801       Long deadCode = deadServers.get(hostAndPort);
802       if (deadCode == null || startCode > deadCode.longValue()) {
803         if (serverManager.isServerReachable(server)) {
804           return false;
805         }
806         // The size of deadServers won't grow unbounded.
807         deadServers.put(hostAndPort, Long.valueOf(startCode));
808       }
809       // Watch out! If the server is not dead, the region could
810       // remain unassigned. That's why ServerManager#isServerReachable
811       // should use some retry.
812       //
813       // We cache this info since it is very unlikely for that
814       // instance to come back up later on. We don't want to expire
815       // the server since we prefer to let it die naturally.
816       LOG.warn("Couldn't reach online server " + server);
817     }
818     // Now, we know it's dead. Check if it's processed
819     return !processedServers.containsKey(server);
820   }
821 
822  /**
823    * Get the last region server a region was on for purpose of re-assignment,
824    * i.e. should the re-assignment be held back till log split is done?
825    */
826   synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
827     return lastAssignments.get(encodedName);
828   }
829 
830   synchronized void setLastRegionServerOfRegions(
831       final ServerName serverName, final List<HRegionInfo> regionInfos) {
832     for (HRegionInfo hri: regionInfos) {
833       setLastRegionServerOfRegion(serverName, hri.getEncodedName());
834     }
835   }
836 
837   synchronized void setLastRegionServerOfRegion(
838       final ServerName serverName, final String encodedName) {
839     lastAssignments.put(encodedName, serverName);
840   }
841 
842   synchronized boolean isRegionOnServer(
843       final HRegionInfo hri, final ServerName serverName) {
844     Set<HRegionInfo> regions = serverHoldings.get(serverName);
845     return regions == null ? false : regions.contains(hri);
846   }
847 
848   void splitRegion(HRegionInfo p,
849       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
850 
851     regionStateStore.splitRegion(p, a, b, sn, getRegionReplication(p));
852     synchronized (this) {
853       // After PONR, split is considered to be done.
854       // Update server holdings to be aligned with the meta.
855       Set<HRegionInfo> regions = serverHoldings.get(sn);
856       if (regions == null) {
857         throw new IllegalStateException(sn + " should host some regions");
858       }
859       regions.remove(p);
860       regions.add(a);
861       regions.add(b);
862     }
863   }
864 
865   void mergeRegions(HRegionInfo p,
866       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
867     regionStateStore.mergeRegions(p, a, b, sn, getRegionReplication(a));
868     synchronized (this) {
869       // After PONR, merge is considered to be done.
870       // Update server holdings to be aligned with the meta.
871       Set<HRegionInfo> regions = serverHoldings.get(sn);
872       if (regions == null) {
873         throw new IllegalStateException(sn + " should host some regions");
874       }
875       regions.remove(a);
876       regions.remove(b);
877       regions.add(p);
878     }
879   }
880 
881   private int getRegionReplication(HRegionInfo r) throws IOException {
882     if (tableStateManager != null) {
883       HTableDescriptor htd = server.getTableDescriptors().get(r.getTable());
884       if (htd != null) {
885         return htd.getRegionReplication();
886       }
887     }
888     return 1;
889   }
890 
891   /**
892    * At cluster clean re/start, mark all user regions closed except those of tables
893    * that are excluded, such as disabled/disabling/enabling tables. All user regions
894    * and their previous locations are returned.
895    */
896   synchronized Map<HRegionInfo, ServerName> closeAllUserRegions(Set<TableName> excludedTables) {
897     boolean noExcludeTables = excludedTables == null || excludedTables.isEmpty();
898     Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
899     for(RegionState state: regionStates.values()) {
900       HRegionInfo hri = state.getRegion();
901       if (state.isSplit() || hri.isSplit()) {
902         continue;
903       }
904       TableName tableName = hri.getTable();
905       if (!TableName.META_TABLE_NAME.equals(tableName)
906           && (noExcludeTables || !excludedTables.contains(tableName))) {
907         toBeClosed.add(hri);
908       }
909     }
910     Map<HRegionInfo, ServerName> allUserRegions =
911       new HashMap<HRegionInfo, ServerName>(toBeClosed.size());
912     for (HRegionInfo hri: toBeClosed) {
913       RegionState regionState = updateRegionState(hri, State.CLOSED);
914       allUserRegions.put(hri, regionState.getServerName());
915     }
916     return allUserRegions;
917   }
918 
919   /**
920    * Compute the average load across all region servers.
921    * Currently, this uses a very naive computation - just uses the number of
922    * regions being served, ignoring stats about number of requests.
923    * @return the average load
924    */
925   protected synchronized double getAverageLoad() {
926     int numServers = 0, totalLoad = 0;
927     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
928       Set<HRegionInfo> regions = e.getValue();
929       ServerName serverName = e.getKey();
930       int regionCount = regions.size();
931       if (serverManager.isServerOnline(serverName)) {
932         totalLoad += regionCount;
933         numServers++;
934       }
935     }
936     if (numServers > 1) {
937       // The master region server holds only a couple regions.
938       // Don't consider this server in calculating the average load
939       // if there are other region servers to avoid possible confusion.
940       Set<HRegionInfo> hris = serverHoldings.get(server.getServerName());
941       if (hris != null) {
942         totalLoad -= hris.size();
943         numServers--;
944       }
945     }
946     return numServers == 0 ? 0.0 :
947       (double)totalLoad / (double)numServers;
948   }
949 
950   /**
951    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
952    * Can't let out original since it can change and at least the load balancer
953    * wants to iterate this exported list.  We need to synchronize on regions
954    * since all access to this.servers is under a lock on this.regions.
955    *
956    * @return A clone of current assignments by table.
957    */
958   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
959       getAssignmentsByTable() {
960     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
961       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
962     synchronized (this) {
963       if (!server.getConfiguration().getBoolean(
964             HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false)) {
965         Map<ServerName, List<HRegionInfo>> svrToRegions =
966           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
967         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
968           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
969         }
970         result.put(TableName.valueOf(HConstants.ENSEMBLE_TABLE_NAME), svrToRegions);
971       } else {
972         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
973           for (HRegionInfo hri: e.getValue()) {
974             if (hri.isMetaRegion()) continue;
975             TableName tablename = hri.getTable();
976             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
977             if (svrToRegions == null) {
978               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
979               result.put(tablename, svrToRegions);
980             }
981             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
982             if (regions == null) {
983               regions = new ArrayList<HRegionInfo>();
984               svrToRegions.put(e.getKey(), regions);
985             }
986             regions.add(hri);
987           }
988         }
989       }
990     }
991 
992     Map<ServerName, ServerLoad>
993       onlineSvrs = serverManager.getOnlineServers();
994     // Take care of servers w/o assignments, and remove servers in draining mode
995     List<ServerName> drainingServers = this.serverManager.getDrainingServersList();
996     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
997       for (ServerName svr: onlineSvrs.keySet()) {
998         if (!map.containsKey(svr)) {
999           map.put(svr, new ArrayList<HRegionInfo>());
1000         }
1001       }
1002       map.keySet().removeAll(drainingServers);
1003     }
1004     return result;
1005   }
1006 
1007   protected RegionState getRegionState(final HRegionInfo hri) {
1008     return getRegionState(hri.getEncodedName());
1009   }
1010 
1011   /**
1012    * Returns a clone of region assignments per server
1013    * @return a Map of ServerName to a List of HRegionInfo's
1014    */
1015   protected synchronized Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer() {
1016     Map<ServerName, List<HRegionInfo>> regionsByServer =
1017         new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
1018     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
1019       regionsByServer.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
1020     }
1021     return regionsByServer;
1022   }
1023 
1024   protected synchronized RegionState getRegionState(final String encodedName) {
1025     return regionStates.get(encodedName);
1026   }
1027 
1028   /**
1029    * Get the HRegionInfo from cache, if not there, from the hbase:meta table.
1030    * Be careful. Does RPC. Do not hold a lock or synchronize when you call this method.
1031    * @param  regionName
1032    * @return HRegionInfo for the region
1033    */
1034   @SuppressWarnings("deprecation")
1035   protected HRegionInfo getRegionInfo(final byte [] regionName) {
1036     String encodedName = HRegionInfo.encodeRegionName(regionName);
1037     RegionState regionState = getRegionState(encodedName);
1038     if (regionState != null) {
1039       return regionState.getRegion();
1040     }
1041 
1042     try {
1043       Pair<HRegionInfo, ServerName> p =
1044         MetaTableAccessor.getRegion(server.getConnection(), regionName);
1045       HRegionInfo hri = p == null ? null : p.getFirst();
1046       if (hri != null) {
1047         createRegionState(hri);
1048       }
1049       return hri;
1050     } catch (IOException e) {
1051       server.abort("Aborting because error occoured while reading "
1052         + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
1053       return null;
1054     }
1055   }
1056 
1057   static boolean isOneOfStates(RegionState regionState, State... states) {
1058     State s = regionState != null ? regionState.getState() : null;
1059     for (State state: states) {
1060       if (s == state) return true;
1061     }
1062     return false;
1063   }
1064 
1065   /**
1066    * Update a region state. It will be put in transition if not already there.
1067    */
1068   private RegionState updateRegionState(final HRegionInfo hri,
1069       final RegionState.State state, final ServerName serverName, long openSeqNum) {
1070     if (state == RegionState.State.FAILED_CLOSE || state == RegionState.State.FAILED_OPEN) {
1071       LOG.warn("Failed to open/close " + hri.getShortNameToLog()
1072         + " on " + serverName + ", set to " + state);
1073     }
1074 
1075     String encodedName = hri.getEncodedName();
1076     RegionState regionState = new RegionState(
1077       hri, state, System.currentTimeMillis(), serverName);
1078     RegionState oldState = getRegionState(encodedName);
1079     if (!regionState.equals(oldState)) {
1080       LOG.info("Transition " + oldState + " to " + regionState);
1081       // Persist region state before updating in-memory info, if needed
1082       regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
1083     }
1084 
1085     synchronized (this) {
1086       regionsInTransition.put(encodedName, regionState);
1087       putRegionState(regionState);
1088 
1089       // For these states, region should be properly closed.
1090       // There should be no log splitting issue.
1091       if ((state == State.CLOSED || state == State.MERGED
1092           || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
1093         ServerName last = lastAssignments.get(encodedName);
1094         if (last.equals(serverName)) {
1095           lastAssignments.remove(encodedName);
1096         } else {
1097           LOG.warn(encodedName + " moved to " + state + " on "
1098             + serverName + ", expected " + last);
1099         }
1100       }
1101 
1102       // Once a region is opened, record its last assignment right away.
1103       if (serverName != null && state == State.OPEN) {
1104         ServerName last = lastAssignments.get(encodedName);
1105         if (!serverName.equals(last)) {
1106           lastAssignments.put(encodedName, serverName);
1107           if (last != null && isServerDeadAndNotProcessed(last)) {
1108             LOG.warn(encodedName + " moved to " + serverName
1109               + ", while it's previous host " + last
1110               + " is dead but not processed yet");
1111           }
1112         }
1113       }
1114 
1115       // notify the change
1116       this.notifyAll();
1117     }
1118     return regionState;
1119   }
1120 }