View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.Iterator;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Set;
29  import java.util.TreeMap;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.RegionTransition;
38  import org.apache.hadoop.hbase.Server;
39  import org.apache.hadoop.hbase.ServerLoad;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.TableStateManager;
43  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
44  import org.apache.hadoop.hbase.MetaTableAccessor;
45  import org.apache.hadoop.hbase.master.RegionState.State;
46  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.Pair;
49  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
50  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
51  import org.apache.zookeeper.KeeperException;
52  
53  import com.google.common.annotations.VisibleForTesting;
54  import com.google.common.base.Preconditions;
55  
56  /**
57   * Region state accountant. It holds the states of all regions in the memory.
58   * In normal scenario, it should match the meta table and the true region states.
59   *
60   * This map is used by AssignmentManager to track region states.
61   */
62  @InterfaceAudience.Private
63  public class RegionStates {
64    private static final Log LOG = LogFactory.getLog(RegionStates.class);
65  
66    /**
67     * Regions currently in transition.
68     */
69    final HashMap<String, RegionState> regionsInTransition;
70  
71    /**
72     * Region encoded name to state map.
73     * All the regions should be in this map.
74     */
75    private final Map<String, RegionState> regionStates;
76  
77    /**
78     * Server to regions assignment map.
79     * Contains the set of regions currently assigned to a given server.
80     */
81    private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
82  
83    /**
84     * Maintains the mapping from the default region to the replica regions.
85     */
86    private final Map<HRegionInfo, Set<HRegionInfo>> defaultReplicaToOtherReplicas;
87  
88    /**
89     * Region to server assignment map.
90     * Contains the server a given region is currently assigned to.
91     */
92    private final TreeMap<HRegionInfo, ServerName> regionAssignments;
93  
94    /**
95     * Encoded region name to server assignment map for re-assignment
96     * purpose. Contains the server a given region is last known assigned
97     * to, which has not completed log splitting, so not assignable.
98     * If a region is currently assigned, this server info in this
99     * map should be the same as that in regionAssignments.
100    * However the info in regionAssignments is cleared when the region
101    * is offline while the info in lastAssignments is cleared when
102    * the region is closed or the server is dead and processed.
103    */
104   private final HashMap<String, ServerName> lastAssignments;
105 
106   /**
107    * Map a host port pair string to the latest start code
108    * of a region server which is known to be dead. It is dead
109    * to us, but server manager may not know it yet.
110    */
111   private final HashMap<String, Long> deadServers;
112 
113   /**
114    * Map a dead servers to the time when log split is done.
115    * Since log splitting is not ordered, we have to remember
116    * all processed instances. The map is cleaned up based
117    * on a configured time. By default, we assume a dead
118    * server should be done with log splitting in two hours.
119    */
120   private final HashMap<ServerName, Long> processedServers;
121   private long lastProcessedServerCleanTime;
122 
123   private final TableStateManager tableStateManager;
124   private final RegionStateStore regionStateStore;
125   private final ServerManager serverManager;
126   private final Server server;
127 
128   // The maximum time to keep a log split info in region states map
129   static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
130   static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
131 
132   RegionStates(final Server master, final TableStateManager tableStateManager,
133       final ServerManager serverManager, final RegionStateStore regionStateStore) {
134     regionStates = new HashMap<String, RegionState>();
135     regionsInTransition = new HashMap<String, RegionState>();
136     serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
137     defaultReplicaToOtherReplicas = new HashMap<HRegionInfo, Set<HRegionInfo>>();
138     regionAssignments = new TreeMap<HRegionInfo, ServerName>();
139     lastAssignments = new HashMap<String, ServerName>();
140     processedServers = new HashMap<ServerName, Long>();
141     deadServers = new HashMap<String, Long>();
142     this.tableStateManager = tableStateManager;
143     this.regionStateStore = regionStateStore;
144     this.serverManager = serverManager;
145     this.server = master;
146   }
147 
148   /**
149    * @return an unmodifiable the region assignment map
150    */
151   @SuppressWarnings("unchecked")
152   public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
153     return (Map<HRegionInfo, ServerName>)regionAssignments.clone();
154   }
155 
156   /**
157    * Return the replicas (including default) for the regions grouped by ServerName
158    * @param regions
159    * @return a pair containing the groupings as a map
160    */
161   synchronized Map<ServerName, List<HRegionInfo>> getRegionAssignments(
162     Collection<HRegionInfo> regions) {
163     Map<ServerName, List<HRegionInfo>> map = new HashMap<ServerName, List<HRegionInfo>>();
164     for (HRegionInfo region : regions) {
165       HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(region);
166       Set<HRegionInfo> allReplicas = defaultReplicaToOtherReplicas.get(defaultReplica);
167       if (allReplicas != null) {
168         for (HRegionInfo hri : allReplicas) {
169           ServerName server = regionAssignments.get(hri);
170           if (server != null) {
171             List<HRegionInfo> regionsOnServer = map.get(server);
172             if (regionsOnServer == null) {
173               regionsOnServer = new ArrayList<HRegionInfo>(1);
174               map.put(server, regionsOnServer);
175             }
176             regionsOnServer.add(hri);
177           }
178         }
179       }
180     }
181     return map;
182   }
183 
184   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
185     return regionAssignments.get(hri);
186   }
187 
188   /**
189    * Get regions in transition and their states
190    */
191   @SuppressWarnings("unchecked")
192   public synchronized Map<String, RegionState> getRegionsInTransition() {
193     return (Map<String, RegionState>)regionsInTransition.clone();
194   }
195 
196   /**
197    * @return True if specified region in transition.
198    */
199   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
200     return regionsInTransition.containsKey(hri.getEncodedName());
201   }
202 
203   /**
204    * @return True if specified region in transition.
205    */
206   public synchronized boolean isRegionInTransition(final String encodedName) {
207     return regionsInTransition.containsKey(encodedName);
208   }
209 
210   /**
211    * @return True if any region in transition.
212    */
213   public synchronized boolean isRegionsInTransition() {
214     return !regionsInTransition.isEmpty();
215   }
216 
217   /**
218    * @return True if specified region assigned, and not in transition.
219    */
220   public synchronized boolean isRegionOnline(final HRegionInfo hri) {
221     return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
222   }
223 
224   /**
225    * @return True if specified region offline/closed, but not in transition.
226    * If the region is not in the map, it is offline to us too.
227    */
228   public synchronized boolean isRegionOffline(final HRegionInfo hri) {
229     return getRegionState(hri) == null || (!isRegionInTransition(hri)
230       && isRegionInState(hri, State.OFFLINE, State.CLOSED));
231   }
232 
233   /**
234    * @return True if specified region is in one of the specified states.
235    */
236   public boolean isRegionInState(
237       final HRegionInfo hri, final State... states) {
238     return isRegionInState(hri.getEncodedName(), states);
239   }
240 
241   /**
242    * @return True if specified region is in one of the specified states.
243    */
244   public boolean isRegionInState(
245       final String encodedName, final State... states) {
246     RegionState regionState = getRegionState(encodedName);
247     return isOneOfStates(regionState, states);
248   }
249 
250   /**
251    * Wait for the state map to be updated by assignment manager.
252    */
253   public synchronized void waitForUpdate(
254       final long timeout) throws InterruptedException {
255     this.wait(timeout);
256   }
257 
258   /**
259    * Get region transition state
260    */
261   public RegionState getRegionTransitionState(final HRegionInfo hri) {
262     return getRegionTransitionState(hri.getEncodedName());
263   }
264 
265   /**
266    * Get region transition state
267    */
268   public synchronized RegionState
269       getRegionTransitionState(final String encodedName) {
270     return regionsInTransition.get(encodedName);
271   }
272 
273   /**
274    * Add a list of regions to RegionStates. If a region is split
275    * and offline, its state will be SPLIT. Otherwise, its state will
276    * be OFFLINE. Region already in RegionStates will be skipped.
277    */
278   public void createRegionStates(
279       final List<HRegionInfo> hris) {
280     for (HRegionInfo hri: hris) {
281       createRegionState(hri);
282     }
283   }
284 
285   /**
286    * Add a region to RegionStates. If the region is split
287    * and offline, its state will be SPLIT. Otherwise, its state will
288    * be OFFLINE. If it is already in RegionStates, this call has
289    * no effect, and the original state is returned.
290    */
291   public RegionState createRegionState(final HRegionInfo hri) {
292     return createRegionState(hri, null, null, null);
293   }
294 
295   /**
296    * Add a region to RegionStates with the specified state.
297    * If the region is already in RegionStates, this call has
298    * no effect, and the original state is returned.
299    *
300    * @param hri the region info to create a state for
301    * @param newState the state to the region in set to
302    * @param serverName the server the region is transitioning on
303    * @param lastHost the last server that hosts the region
304    * @return the current state
305    */
306   public synchronized RegionState createRegionState(final HRegionInfo hri,
307       State newState, ServerName serverName, ServerName lastHost) {
308     if (newState == null || (newState == State.OPEN && serverName == null)) {
309       newState =  State.OFFLINE;
310     }
311     if (hri.isOffline() && hri.isSplit()) {
312       newState = State.SPLIT;
313       serverName = null;
314     }
315     String encodedName = hri.getEncodedName();
316     RegionState regionState = regionStates.get(encodedName);
317     if (regionState != null) {
318       LOG.warn("Tried to create a state for a region already in RegionStates, "
319         + "used existing: " + regionState + ", ignored new: " + newState);
320     } else {
321       regionState = new RegionState(hri, newState, serverName);
322       regionStates.put(encodedName, regionState);
323       if (newState == State.OPEN) {
324         if (!serverName.equals(lastHost)) {
325           LOG.warn("Open region's last host " + lastHost
326             + " should be the same as the current one " + serverName
327             + ", ignored the last and used the current one");
328           lastHost = serverName;
329         }
330         lastAssignments.put(encodedName, lastHost);
331         regionAssignments.put(hri, lastHost);
332       } else if (!regionState.isUnassignable()) {
333         regionsInTransition.put(encodedName, regionState);
334       }
335       if (lastHost != null && newState != State.SPLIT) {
336         addToServerHoldings(lastHost, hri);
337       }
338     }
339     return regionState;
340   }
341 
342   /**
343    * Update a region state. It will be put in transition if not already there.
344    */
345   public RegionState updateRegionState(
346       final HRegionInfo hri, final State state) {
347     RegionState regionState = getRegionState(hri.getEncodedName());
348     return updateRegionState(hri, state,
349       regionState == null ? null : regionState.getServerName());
350   }
351 
352   /**
353    * Update a region state. It will be put in transition if not already there.
354    *
355    * If we can't find the region info based on the region name in
356    * the transition, log a warning and return null.
357    */
358   public RegionState updateRegionState(
359       final RegionTransition transition, final State state) {
360     byte [] regionName = transition.getRegionName();
361     HRegionInfo regionInfo = getRegionInfo(regionName);
362     if (regionInfo == null) {
363       String prettyRegionName = HRegionInfo.prettyPrint(
364         HRegionInfo.encodeRegionName(regionName));
365       LOG.warn("Failed to find region " + prettyRegionName
366         + " in updating its state to " + state
367         + " based on region transition " + transition);
368       return null;
369     }
370     return updateRegionState(regionInfo, state,
371       transition.getServerName());
372   }
373 
374   /**
375    * Update a region state. It will be put in transition if not already there.
376    */
377   public RegionState updateRegionState(
378       final HRegionInfo hri, final State state, final ServerName serverName) {
379     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
380   }
381 
382   public void regionOnline(
383       final HRegionInfo hri, final ServerName serverName) {
384     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
385   }
386 
387   /**
388    * A region is online, won't be in transition any more.
389    * We can't confirm it is really online on specified region server
390    * because it hasn't been put in region server's online region list yet.
391    */
392   public void regionOnline(final HRegionInfo hri,
393       final ServerName serverName, long openSeqNum) {
394     if (!serverManager.isServerOnline(serverName)) {
395       // This is possible if the region server dies before master gets a
396       // chance to handle ZK event in time. At this time, if the dead server
397       // is already processed by SSH, we should ignore this event.
398       // If not processed yet, ignore and let SSH deal with it.
399       LOG.warn("Ignored, " + hri.getEncodedName()
400         + " was opened on a dead server: " + serverName);
401       return;
402     }
403     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
404 
405     synchronized (this) {
406       regionsInTransition.remove(hri.getEncodedName());
407       ServerName oldServerName = regionAssignments.put(hri, serverName);
408       if (!serverName.equals(oldServerName)) {
409         LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
410         addToServerHoldings(serverName, hri);
411         addToReplicaMapping(hri);
412         if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
413           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
414           removeFromServerHoldings(oldServerName, hri);
415         }
416       }
417     }
418   }
419 
420   private void addToServerHoldings(ServerName serverName, HRegionInfo hri) {
421     Set<HRegionInfo> regions = serverHoldings.get(serverName);
422     if (regions == null) {
423       regions = new HashSet<HRegionInfo>();
424       serverHoldings.put(serverName, regions);
425     }
426     regions.add(hri);
427   }
428 
429   private void addToReplicaMapping(HRegionInfo hri) {
430     HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
431     Set<HRegionInfo> replicas =
432         defaultReplicaToOtherReplicas.get(defaultReplica);
433     if (replicas == null) {
434       replicas = new HashSet<HRegionInfo>();
435       defaultReplicaToOtherReplicas.put(defaultReplica, replicas);
436     }
437     replicas.add(hri);
438   }
439 
440   private void removeFromServerHoldings(ServerName serverName, HRegionInfo hri) {
441     Set<HRegionInfo> oldRegions = serverHoldings.get(serverName);
442     oldRegions.remove(hri);
443     if (oldRegions.isEmpty()) {
444       serverHoldings.remove(serverName);
445     }
446   }
447 
448   private void removeFromReplicaMapping(HRegionInfo hri) {
449     HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
450     Set<HRegionInfo> replicas = defaultReplicaToOtherReplicas.get(defaultReplica);
451     if (replicas != null) {
452       replicas.remove(hri);
453       if (replicas.isEmpty()) {
454         defaultReplicaToOtherReplicas.remove(defaultReplica);
455       }
456     }
457   }
458 
459   /**
460    * A dead server's hlogs have been split so that all the regions
461    * used to be open on it can be safely assigned now. Mark them assignable.
462    */
463   public synchronized void logSplit(final ServerName serverName) {
464     for (Iterator<Map.Entry<String, ServerName>> it
465         = lastAssignments.entrySet().iterator(); it.hasNext();) {
466       Map.Entry<String, ServerName> e = it.next();
467       if (e.getValue().equals(serverName)) {
468         it.remove();
469       }
470     }
471     long now = System.currentTimeMillis();
472     if (LOG.isDebugEnabled()) {
473       LOG.debug("Adding to processed servers " + serverName);
474     }
475     processedServers.put(serverName, Long.valueOf(now));
476     Configuration conf = server.getConfiguration();
477     long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
478     // Doesn't have to be very accurate about the clean up time
479     if (now > lastProcessedServerCleanTime + obsoleteTime) {
480       lastProcessedServerCleanTime = now;
481       long cutoff = now - obsoleteTime;
482       for (Iterator<Map.Entry<ServerName, Long>> it
483           = processedServers.entrySet().iterator(); it.hasNext();) {
484         Map.Entry<ServerName, Long> e = it.next();
485         if (e.getValue().longValue() < cutoff) {
486           if (LOG.isDebugEnabled()) {
487             LOG.debug("Removed from processed servers " + e.getKey());
488           }
489           it.remove();
490         }
491       }
492     }
493   }
494 
495   /**
496    * Log split is done for a given region, so it is assignable now.
497    */
498   public void logSplit(final HRegionInfo region) {
499     clearLastAssignment(region);
500   }
501 
502   public synchronized void clearLastAssignment(final HRegionInfo region) {
503     lastAssignments.remove(region.getEncodedName());
504   }
505 
506   /**
507    * A region is offline, won't be in transition any more.
508    */
509   public void regionOffline(final HRegionInfo hri) {
510     regionOffline(hri, null);
511   }
512 
513   /**
514    * A region is offline, won't be in transition any more. Its state
515    * should be the specified expected state, which can only be
516    * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
517    */
518   public void regionOffline(
519       final HRegionInfo hri, final State expectedState) {
520     Preconditions.checkArgument(expectedState == null
521       || RegionState.isUnassignable(expectedState),
522         "Offlined region should not be " + expectedState);
523     if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
524       // Remove it from all region maps
525       deleteRegion(hri);
526       return;
527     }
528     State newState =
529       expectedState == null ? State.OFFLINE : expectedState;
530     updateRegionState(hri, newState);
531 
532     synchronized (this) {
533       regionsInTransition.remove(hri.getEncodedName());
534       ServerName oldServerName = regionAssignments.remove(hri);
535       if (oldServerName != null && serverHoldings.containsKey(oldServerName)
536           && (newState == State.MERGED || newState == State.SPLIT
537             || tableStateManager.isTableState(hri.getTable(),
538               ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING))) {
539         // Offline the region only if it's merged/split, or the table is disabled/disabling.
540         // Otherwise, offline it from this server only when it is online on a different server.
541         LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
542         removeFromServerHoldings(oldServerName, hri);
543         removeFromReplicaMapping(hri);
544       }
545     }
546   }
547 
548   /**
549    * A server is offline, all regions on it are dead.
550    */
551   public synchronized List<HRegionInfo> serverOffline(
552       final ZooKeeperWatcher watcher, final ServerName sn) {
553     // Offline all regions on this server not already in transition.
554     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
555     Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
556     if (assignedRegions == null) {
557       assignedRegions = new HashSet<HRegionInfo>();
558     }
559 
560     // Offline regions outside the loop to avoid ConcurrentModificationException
561     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
562     for (HRegionInfo region : assignedRegions) {
563       // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
564       if (isRegionOnline(region)) {
565         regionsToOffline.add(region);
566       } else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
567         LOG.debug("Offline splitting/merging region " + getRegionState(region));
568         try {
569           // Delete the ZNode if exists
570           ZKAssign.deleteNodeFailSilent(watcher, region);
571           regionsToOffline.add(region);
572         } catch (KeeperException ke) {
573           server.abort("Unexpected ZK exception deleting node " + region, ke);
574         }
575       }
576     }
577 
578     for (HRegionInfo hri : regionsToOffline) {
579       regionOffline(hri);
580     }
581 
582     for (RegionState state : regionsInTransition.values()) {
583       HRegionInfo hri = state.getRegion();
584       if (assignedRegions.contains(hri)) {
585         // Region is open on this region server, but in transition.
586         // This region must be moving away from this server, or splitting/merging.
587         // SSH will handle it, either skip assigning, or re-assign.
588         LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
589       } else if (sn.equals(state.getServerName())) {
590         // Region is in transition on this region server, and this
591         // region is not open on this server. So the region must be
592         // moving to this server from another one (i.e. opening or
593         // pending open on this server, was open on another one.
594         // Offline state is also kind of pending open if the region is in
595         // transition. The region could be in failed_close state too if we have
596         // tried several times to open it while this region server is not reachable)
597         if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
598           LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
599           rits.add(hri);
600         } else {
601           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
602         }
603       }
604     }
605 
606     this.notifyAll();
607     return rits;
608   }
609 
610   /**
611    * Gets the online regions of the specified table.
612    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
613    * Only returns <em>online</em> regions.  If a region on this table has been
614    * closed during a disable, etc., it will be included in the returned list.
615    * So, the returned list may not necessarily be ALL regions in this table, its
616    * all the ONLINE regions in the table.
617    * @param tableName
618    * @return Online regions from <code>tableName</code>
619    */
620   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
621     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
622     // boundary needs to have table's name but regionID 0 so that it is sorted
623     // before all table's regions.
624     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
625     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
626       if(!hri.getTable().equals(tableName)) break;
627       tableRegions.add(hri);
628     }
629     return tableRegions;
630   }
631 
632 
633   /**
634    * Wait on region to clear regions-in-transition.
635    * <p>
636    * If the region isn't in transition, returns immediately.  Otherwise, method
637    * blocks until the region is out of transition.
638    */
639   public synchronized void waitOnRegionToClearRegionsInTransition(
640       final HRegionInfo hri) throws InterruptedException {
641     if (!isRegionInTransition(hri)) return;
642 
643     while(!server.isStopped() && isRegionInTransition(hri)) {
644       RegionState rs = getRegionState(hri);
645       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
646       waitForUpdate(100);
647     }
648 
649     if (server.isStopped()) {
650       LOG.info("Giving up wait on region in " +
651         "transition because stoppable.isStopped is set");
652     }
653   }
654 
655   /**
656    * A table is deleted. Remove its regions from all internal maps.
657    * We loop through all regions assuming we don't delete tables too much.
658    */
659   public void tableDeleted(final TableName tableName) {
660     Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
661     synchronized (this) {
662       for (RegionState state: regionStates.values()) {
663         HRegionInfo region = state.getRegion();
664         if (region.getTable().equals(tableName)) {
665           regionsToDelete.add(region);
666         }
667       }
668     }
669     for (HRegionInfo region: regionsToDelete) {
670       deleteRegion(region);
671     }
672   }
673 
674   /**
675    * Get a copy of all regions assigned to a server
676    */
677   public synchronized Set<HRegionInfo> getServerRegions(ServerName serverName) {
678     Set<HRegionInfo> regions = serverHoldings.get(serverName);
679     if (regions == null) return null;
680     return new HashSet<HRegionInfo>(regions);
681   }
682 
683   /**
684    * Remove a region from all state maps.
685    */
686   @VisibleForTesting
687   public synchronized void deleteRegion(final HRegionInfo hri) {
688     String encodedName = hri.getEncodedName();
689     regionsInTransition.remove(encodedName);
690     regionStates.remove(encodedName);
691     lastAssignments.remove(encodedName);
692     ServerName sn = regionAssignments.remove(hri);
693     if (sn != null) {
694       Set<HRegionInfo> regions = serverHoldings.get(sn);
695       regions.remove(hri);
696     }
697   }
698 
699   /**
700    * Checking if a region was assigned to a server which is not online now.
701    * If so, we should hold re-assign this region till SSH has split its hlogs.
702    * Once logs are split, the last assignment of this region will be reset,
703    * which means a null last assignment server is ok for re-assigning.
704    *
705    * A region server could be dead but we don't know it yet. We may
706    * think it's online falsely. Therefore if a server is online, we still
707    * need to confirm it reachable and having the expected start code.
708    */
709   synchronized boolean wasRegionOnDeadServer(final String encodedName) {
710     ServerName server = lastAssignments.get(encodedName);
711     return isServerDeadAndNotProcessed(server);
712   }
713 
714   synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
715     if (server == null) return false;
716     if (serverManager.isServerOnline(server)) {
717       String hostAndPort = server.getHostAndPort();
718       long startCode = server.getStartcode();
719       Long deadCode = deadServers.get(hostAndPort);
720       if (deadCode == null || startCode > deadCode.longValue()) {
721         if (serverManager.isServerReachable(server)) {
722           return false;
723         }
724         // The size of deadServers won't grow unbounded.
725         deadServers.put(hostAndPort, Long.valueOf(startCode));
726       }
727       // Watch out! If the server is not dead, the region could
728       // remain unassigned. That's why ServerManager#isServerReachable
729       // should use some retry.
730       //
731       // We cache this info since it is very unlikely for that
732       // instance to come back up later on. We don't want to expire
733       // the server since we prefer to let it die naturally.
734       LOG.warn("Couldn't reach online server " + server);
735     }
736     // Now, we know it's dead. Check if it's processed
737     return !processedServers.containsKey(server);
738   }
739 
740  /**
741    * Get the last region server a region was on for purpose of re-assignment,
742    * i.e. should the re-assignment be held back till log split is done?
743    */
744   synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
745     return lastAssignments.get(encodedName);
746   }
747 
748   synchronized void setLastRegionServerOfRegions(
749       final ServerName serverName, final List<HRegionInfo> regionInfos) {
750     for (HRegionInfo hri: regionInfos) {
751       setLastRegionServerOfRegion(serverName, hri.getEncodedName());
752     }
753   }
754 
755   synchronized void setLastRegionServerOfRegion(
756       final ServerName serverName, final String encodedName) {
757     lastAssignments.put(encodedName, serverName);
758   }
759 
760   void splitRegion(HRegionInfo p,
761       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
762     regionStateStore.splitRegion(p, a, b, sn);
763     synchronized (this) {
764       // After PONR, split is considered to be done.
765       // Update server holdings to be aligned with the meta.
766       Set<HRegionInfo> regions = serverHoldings.get(sn);
767       if (regions == null) {
768         throw new IllegalStateException(sn + " should host some regions");
769       }
770       regions.remove(p);
771       regions.add(a);
772       regions.add(b);
773     }
774   }
775 
776   void mergeRegions(HRegionInfo p,
777       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
778     regionStateStore.mergeRegions(p, a, b, sn);
779     synchronized (this) {
780       // After PONR, merge is considered to be done.
781       // Update server holdings to be aligned with the meta.
782       Set<HRegionInfo> regions = serverHoldings.get(sn);
783       if (regions == null) {
784         throw new IllegalStateException(sn + " should host some regions");
785       }
786       regions.remove(a);
787       regions.remove(b);
788       regions.add(p);
789     }
790   }
791 
792   /**
793    * At cluster clean re/start, mark all user regions closed except those of tables
794    * that are excluded, such as disabled/disabling/enabling tables. All user regions
795    * and their previous locations are returned.
796    */
797   synchronized Map<HRegionInfo, ServerName> closeAllUserRegions(Set<TableName> excludedTables) {
798     boolean noExcludeTables = excludedTables == null || excludedTables.isEmpty();
799     Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
800     for(RegionState state: regionStates.values()) {
801       HRegionInfo hri = state.getRegion();
802       if (state.isSplit() || hri.isSplit()) {
803         continue;
804       }
805       TableName tableName = hri.getTable();
806       if (!TableName.META_TABLE_NAME.equals(tableName)
807           && (noExcludeTables || !excludedTables.contains(tableName))) {
808         toBeClosed.add(hri);
809       }
810     }
811     Map<HRegionInfo, ServerName> allUserRegions =
812       new HashMap<HRegionInfo, ServerName>(toBeClosed.size());
813     for (HRegionInfo hri: toBeClosed) {
814       RegionState regionState = updateRegionState(hri, State.CLOSED);
815       allUserRegions.put(hri, regionState.getServerName());
816     }
817     return allUserRegions;
818   }
819 
820   /**
821    * Compute the average load across all region servers.
822    * Currently, this uses a very naive computation - just uses the number of
823    * regions being served, ignoring stats about number of requests.
824    * @return the average load
825    */
826   protected synchronized double getAverageLoad() {
827     int numServers = 0, totalLoad = 0;
828     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
829       Set<HRegionInfo> regions = e.getValue();
830       ServerName serverName = e.getKey();
831       int regionCount = regions.size();
832       if (regionCount > 0 || serverManager.isServerOnline(serverName)) {
833         totalLoad += regionCount;
834         numServers++;
835       }
836     }
837     if (numServers > 1) {
838       // The master region server holds only a couple regions.
839       // Don't consider this server in calculating the average load
840       // if there are other region servers to avoid possible confusion.
841       Set<HRegionInfo> hris = serverHoldings.get(server.getServerName());
842       if (hris != null) {
843         totalLoad -= hris.size();
844         numServers--;
845       }
846     }
847     return numServers == 0 ? 0.0 :
848       (double)totalLoad / (double)numServers;
849   }
850 
851   /**
852    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
853    * Can't let out original since it can change and at least the load balancer
854    * wants to iterate this exported list.  We need to synchronize on regions
855    * since all access to this.servers is under a lock on this.regions.
856    *
857    * @return A clone of current assignments by table.
858    */
859   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
860       getAssignmentsByTable() {
861     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
862       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
863     synchronized (this) {
864       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
865         Map<ServerName, List<HRegionInfo>> svrToRegions =
866           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
867         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
868           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
869         }
870         result.put(TableName.valueOf("ensemble"), svrToRegions);
871       } else {
872         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
873           for (HRegionInfo hri: e.getValue()) {
874             if (hri.isMetaRegion()) continue;
875             TableName tablename = hri.getTable();
876             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
877             if (svrToRegions == null) {
878               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
879               result.put(tablename, svrToRegions);
880             }
881             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
882             if (regions == null) {
883               regions = new ArrayList<HRegionInfo>();
884               svrToRegions.put(e.getKey(), regions);
885             }
886             regions.add(hri);
887           }
888         }
889       }
890     }
891 
892     Map<ServerName, ServerLoad>
893       onlineSvrs = serverManager.getOnlineServers();
894     // Take care of servers w/o assignments.
895     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
896       for (ServerName svr: onlineSvrs.keySet()) {
897         if (!map.containsKey(svr)) {
898           map.put(svr, new ArrayList<HRegionInfo>());
899         }
900       }
901     }
902     return result;
903   }
904 
905   protected RegionState getRegionState(final HRegionInfo hri) {
906     return getRegionState(hri.getEncodedName());
907   }
908 
909   /**
910    * Returns a clone of region assignments per server
911    * @return a Map of ServerName to a List of HRegionInfo's
912    */
913   protected synchronized Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer() {
914     Map<ServerName, List<HRegionInfo>> regionsByServer =
915         new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
916     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
917       regionsByServer.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
918     }
919     return regionsByServer;
920   }
921 
922   protected synchronized RegionState getRegionState(final String encodedName) {
923     return regionStates.get(encodedName);
924   }
925 
926   /**
927    * Get the HRegionInfo from cache, if not there, from the hbase:meta table
928    * @param  regionName
929    * @return HRegionInfo for the region
930    */
931   @SuppressWarnings("deprecation")
932   protected HRegionInfo getRegionInfo(final byte [] regionName) {
933     String encodedName = HRegionInfo.encodeRegionName(regionName);
934     RegionState regionState = getRegionState(encodedName);
935     if (regionState != null) {
936       return regionState.getRegion();
937     }
938 
939     try {
940       Pair<HRegionInfo, ServerName> p =
941         MetaTableAccessor.getRegion(server.getShortCircuitConnection(), regionName);
942       HRegionInfo hri = p == null ? null : p.getFirst();
943       if (hri != null) {
944         createRegionState(hri);
945       }
946       return hri;
947     } catch (IOException e) {
948       server.abort("Aborting because error occoured while reading "
949         + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
950       return null;
951     }
952   }
953 
954   static boolean isOneOfStates(RegionState regionState, State... states) {
955     State s = regionState != null ? regionState.getState() : null;
956     for (State state: states) {
957       if (s == state) return true;
958     }
959     return false;
960   }
961 
962   /**
963    * Update a region state. It will be put in transition if not already there.
964    */
965   private RegionState updateRegionState(final HRegionInfo hri,
966       final State state, final ServerName serverName, long openSeqNum) {
967     if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
968       LOG.warn("Failed to open/close " + hri.getShortNameToLog()
969         + " on " + serverName + ", set to " + state);
970     }
971 
972     String encodedName = hri.getEncodedName();
973     RegionState regionState = new RegionState(
974       hri, state, System.currentTimeMillis(), serverName);
975     RegionState oldState = getRegionState(encodedName);
976     if (!regionState.equals(oldState)) {
977       LOG.info("Transition " + oldState + " to " + regionState);
978       // Persist region state before updating in-memory info, if needed
979       regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
980     }
981 
982     synchronized (this) {
983       regionsInTransition.put(encodedName, regionState);
984       regionStates.put(encodedName, regionState);
985 
986       // For these states, region should be properly closed.
987       // There should be no log splitting issue.
988       if ((state == State.CLOSED || state == State.MERGED
989           || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
990         ServerName last = lastAssignments.get(encodedName);
991         if (last.equals(serverName)) {
992           lastAssignments.remove(encodedName);
993         } else {
994           LOG.warn(encodedName + " moved to " + state + " on "
995             + serverName + ", expected " + last);
996         }
997       }
998 
999       // Once a region is opened, record its last assignment right away.
1000       if (serverName != null && state == State.OPEN) {
1001         ServerName last = lastAssignments.get(encodedName);
1002         if (!serverName.equals(last)) {
1003           lastAssignments.put(encodedName, serverName);
1004           if (last != null && isServerDeadAndNotProcessed(last)) {
1005             LOG.warn(encodedName + " moved to " + serverName
1006               + ", while it's previous host " + last
1007               + " is dead but not processed yet");
1008           }
1009         }
1010       }
1011 
1012       // notify the change
1013       this.notifyAll();
1014     }
1015     return regionState;
1016   }
1017 }