001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.master.assignment;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.Comparator;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Set;
032import java.util.SortedSet;
033import java.util.TreeSet;
034import java.util.concurrent.ConcurrentHashMap;
035import java.util.concurrent.ConcurrentSkipListMap;
036import java.util.concurrent.atomic.AtomicInteger;
037import java.util.function.Predicate;
038import java.util.stream.Collectors;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.HRegionLocation;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
045import org.apache.hadoop.hbase.master.RegionState;
046import org.apache.hadoop.hbase.master.RegionState.State;
047import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
050import org.apache.yetus.audience.InterfaceAudience;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
055
056/**
057 * RegionStates contains a set of Maps that describes the in-memory state of the AM, with
058 * the regions available in the system, the region in transition, the offline regions and
059 * the servers holding regions.
060 */
061@InterfaceAudience.Private
062public class RegionStates {
063  private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class);
064
065  protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
066    State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver
067                // but before complete finish of AssignProcedure. HBASE-20100.
068    State.OFFLINE, State.CLOSED,      // disable/offline
069    State.SPLITTING, State.SPLIT,     // ServerCrashProcedure
070    State.OPENING, State.FAILED_OPEN, // already in-progress (retrying)
071  };
072
073  protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
074    State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure
075    State.OPEN,                   // enabled/open
076    State.CLOSING                 // already in-progress (retrying)
077  };
078
079  private static class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
080    public AssignmentProcedureEvent(final RegionInfo regionInfo) {
081      super(regionInfo);
082    }
083  }
084
085  private static class ServerReportEvent extends ProcedureEvent<ServerName> {
086    public ServerReportEvent(final ServerName serverName) {
087      super(serverName);
088    }
089  }
090
091  /**
092   * Current Region State.
093   * In-memory only. Not persisted.
094   */
095  // Mutable/Immutable? Changes have to be synchronized or not?
096  // Data members are volatile which seems to say multi-threaded access is fine.
097  // In the below we do check and set but the check state could change before
098  // we do the set because no synchronization....which seems dodgy. Clear up
099  // understanding here... how many threads accessing? Do locks make it so one
100  // thread at a time working on a single Region's RegionStateNode? Lets presume
101  // so for now. Odd is that elsewhere in this RegionStates, we synchronize on
102  // the RegionStateNode instance. TODO.
103  public static class RegionStateNode implements Comparable<RegionStateNode> {
104    private final RegionInfo regionInfo;
105    private final ProcedureEvent<?> event;
106
107    private volatile RegionTransitionProcedure procedure = null;
108    private volatile ServerName regionLocation = null;
109    // notice that, the lastHost will only be updated when a region is successfully CLOSED through
110    // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
111    // with the data in meta.
112    private volatile ServerName lastHost = null;
113    /**
114     * A Region-in-Transition (RIT) moves through states.
115     * See {@link State} for complete list. A Region that
116     * is opened moves from OFFLINE => OPENING => OPENED.
117     */
118    private volatile State state = State.OFFLINE;
119
120    /**
121     * Updated whenever a call to {@link #setRegionLocation(ServerName)}
122     * or {@link #setState(State, State...)}.
123     */
124    private volatile long lastUpdate = 0;
125
126    private volatile long openSeqNum = HConstants.NO_SEQNUM;
127
128    public RegionStateNode(final RegionInfo regionInfo) {
129      this.regionInfo = regionInfo;
130      this.event = new AssignmentProcedureEvent(regionInfo);
131    }
132
133    /**
134     * @param update new region state this node should be assigned.
135     * @param expected current state should be in this given list of expected states
136     * @return true, if current state is in expected list; otherwise false.
137     */
138    public boolean setState(final State update, final State... expected) {
139      if (!isInState(expected)) {
140        return false;
141      }
142      this.state = update;
143      this.lastUpdate = EnvironmentEdgeManager.currentTime();
144      return true;
145    }
146
147    /**
148     * Put region into OFFLINE mode (set state and clear location).
149     * @return Last recorded server deploy
150     */
151    public ServerName offline() {
152      setState(State.OFFLINE);
153      return setRegionLocation(null);
154    }
155
156    /**
157     * Set new {@link State} but only if currently in <code>expected</code> State
158     * (if not, throw {@link UnexpectedStateException}.
159     */
160    public void transitionState(final State update, final State... expected)
161    throws UnexpectedStateException {
162      if (!setState(update, expected)) {
163        throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
164          " so could move to " + update + " but current state=" + getState());
165      }
166    }
167
168    public boolean isInState(final State... expected) {
169      if (expected != null && expected.length > 0) {
170        boolean expectedState = false;
171        for (int i = 0; i < expected.length; ++i) {
172          expectedState |= (getState() == expected[i]);
173        }
174        return expectedState;
175      }
176      return true;
177    }
178
179    public boolean isStuck() {
180      return isInState(State.FAILED_OPEN) && getProcedure() != null;
181    }
182
183    public boolean isInTransition() {
184      return getProcedure() != null;
185    }
186
187    public long getLastUpdate() {
188      return procedure != null ? procedure.getLastUpdate() : lastUpdate;
189    }
190
191    public void setLastHost(final ServerName serverName) {
192      this.lastHost = serverName;
193    }
194
195    public void setOpenSeqNum(final long seqId) {
196      this.openSeqNum = seqId;
197    }
198
199    public ServerName setRegionLocation(final ServerName serverName) {
200      ServerName lastRegionLocation = this.regionLocation;
201      if (LOG.isTraceEnabled() && serverName == null) {
202        LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
203      }
204      this.regionLocation = serverName;
205      this.lastUpdate = EnvironmentEdgeManager.currentTime();
206      return lastRegionLocation;
207    }
208
209    public boolean setProcedure(final RegionTransitionProcedure proc) {
210      if (this.procedure != null && this.procedure != proc) {
211        return false;
212      }
213      this.procedure = proc;
214      return true;
215    }
216
217    public boolean unsetProcedure(final RegionTransitionProcedure proc) {
218      if (this.procedure != null && this.procedure != proc) {
219        return false;
220      }
221      this.procedure = null;
222      return true;
223    }
224
225    public RegionTransitionProcedure getProcedure() {
226      return procedure;
227    }
228
229    public ProcedureEvent<?> getProcedureEvent() {
230      return event;
231    }
232
233    public RegionInfo getRegionInfo() {
234      return regionInfo;
235    }
236
237    public TableName getTable() {
238      return getRegionInfo().getTable();
239    }
240
241    public boolean isSystemTable() {
242      return getTable().isSystemTable();
243    }
244
245    public ServerName getLastHost() {
246      return lastHost;
247    }
248
249    public ServerName getRegionLocation() {
250      return regionLocation;
251    }
252
253    public State getState() {
254      return state;
255    }
256
257    public long getOpenSeqNum() {
258      return openSeqNum;
259    }
260
261    public int getFormatVersion() {
262      // we don't have any format for now
263      // it should probably be in regionInfo.getFormatVersion()
264      return 0;
265    }
266
267    public RegionState toRegionState() {
268      return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
269    }
270
271    @Override
272    public int compareTo(final RegionStateNode other) {
273      // NOTE: RegionInfo sort by table first, so we are relying on that.
274      // we have a TestRegionState#testOrderedByTable() that check for that.
275      return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
276    }
277
278    @Override
279    public int hashCode() {
280      return getRegionInfo().hashCode();
281    }
282
283    @Override
284    public boolean equals(final Object other) {
285      if (this == other) return true;
286      if (!(other instanceof RegionStateNode)) return false;
287      return compareTo((RegionStateNode)other) == 0;
288    }
289
290    @Override
291    public String toString() {
292      return toDescriptiveString();
293    }
294
295    public String toShortString() {
296      // rit= is the current Region-In-Transition State -- see State enum.
297      return String.format("rit=%s, location=%s", getState(), getRegionLocation());
298    }
299
300    public String toDescriptiveString() {
301      return String.format("%s, table=%s, region=%s",
302        toShortString(), getTable(), getRegionInfo().getEncodedName());
303    }
304  }
305
306  // This comparator sorts the RegionStates by time stamp then Region name.
307  // Comparing by timestamp alone can lead us to discard different RegionStates that happen
308  // to share a timestamp.
309  private static class RegionStateStampComparator implements Comparator<RegionState> {
310    @Override
311    public int compare(final RegionState l, final RegionState r) {
312      int stampCmp = Long.compare(l.getStamp(), r.getStamp());
313      return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
314    }
315  }
316
317  /**
318   * Server State.
319   */
320  public enum ServerState {
321    /**
322     * Initial state. Available.
323     */
324    ONLINE,
325
326    /**
327     * Only server which carries meta can have this state. We will split wal for meta and then
328     * assign meta first before splitting other wals.
329     */
330    SPLITTING_META,
331
332    /**
333     * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure
334     * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more
335     * details.
336     */
337    SPLITTING_META_DONE,
338
339    /**
340     * Server expired/crashed. Currently undergoing WAL splitting.
341     */
342    SPLITTING,
343
344    /**
345     * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely
346     * quit. See the comments in UnassignProcedure.remoteCallFailed for more details.
347     */
348    OFFLINE
349  }
350
351  /**
352   * State of Server; list of hosted regions, etc.
353   */
354  public static class ServerStateNode implements Comparable<ServerStateNode> {
355    private final ServerReportEvent reportEvent;
356
357    private final Set<RegionStateNode> regions;
358    private final ServerName serverName;
359
360    private volatile ServerState state = ServerState.ONLINE;
361
362    public ServerStateNode(final ServerName serverName) {
363      this.serverName = serverName;
364      this.regions = ConcurrentHashMap.newKeySet();
365      this.reportEvent = new ServerReportEvent(serverName);
366    }
367
368    public ServerName getServerName() {
369      return serverName;
370    }
371
372    public ServerState getState() {
373      return state;
374    }
375
376    public ProcedureEvent<?> getReportEvent() {
377      return reportEvent;
378    }
379
380    public boolean isInState(final ServerState... expected) {
381      boolean expectedState = false;
382      if (expected != null) {
383        for (int i = 0; i < expected.length; ++i) {
384          expectedState |= (state == expected[i]);
385        }
386      }
387      return expectedState;
388    }
389
390    private void setState(final ServerState state) {
391      this.state = state;
392    }
393
394    public Set<RegionStateNode> getRegions() {
395      return regions;
396    }
397
398    public int getRegionCount() {
399      return regions.size();
400    }
401
402    public ArrayList<RegionInfo> getRegionInfoList() {
403      ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size());
404      for (RegionStateNode region: regions) {
405        hris.add(region.getRegionInfo());
406      }
407      return hris;
408    }
409
410    public void addRegion(final RegionStateNode regionNode) {
411      this.regions.add(regionNode);
412    }
413
414    public void removeRegion(final RegionStateNode regionNode) {
415      this.regions.remove(regionNode);
416    }
417
418    @Override
419    public int compareTo(final ServerStateNode other) {
420      return getServerName().compareTo(other.getServerName());
421    }
422
423    @Override
424    public int hashCode() {
425      return getServerName().hashCode();
426    }
427
428    @Override
429    public boolean equals(final Object other) {
430      if (this == other) return true;
431      if (!(other instanceof ServerStateNode)) return false;
432      return compareTo((ServerStateNode)other) == 0;
433    }
434
435    @Override
436    public String toString() {
437      return String.format("name=%s, state=%s, regionCount=%d", getServerName(), getState(),
438          getRegionCount());
439    }
440  }
441
442  public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
443      new RegionStateStampComparator();
444
445  // TODO: Replace the ConcurrentSkipListMaps
446  /**
447   * RegionName -- i.e. RegionInfo.getRegionName() -- as bytes to {@link RegionStateNode}
448   */
449  private final ConcurrentSkipListMap<byte[], RegionStateNode> regionsMap =
450      new ConcurrentSkipListMap<byte[], RegionStateNode>(Bytes.BYTES_COMPARATOR);
451
452  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionInTransition =
453    new ConcurrentSkipListMap<RegionInfo, RegionStateNode>(RegionInfo.COMPARATOR);
454
455  /**
456   * Regions marked as offline on a read of hbase:meta. Unused or at least, once
457   * offlined, regions have no means of coming on line again. TODO.
458   */
459  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionOffline =
460    new ConcurrentSkipListMap<RegionInfo, RegionStateNode>();
461
462  private final ConcurrentSkipListMap<byte[], RegionFailedOpen> regionFailedOpen =
463    new ConcurrentSkipListMap<byte[], RegionFailedOpen>(Bytes.BYTES_COMPARATOR);
464
465  private final ConcurrentHashMap<ServerName, ServerStateNode> serverMap =
466      new ConcurrentHashMap<ServerName, ServerStateNode>();
467
468  public RegionStates() { }
469
470  public void clear() {
471    regionsMap.clear();
472    regionInTransition.clear();
473    regionOffline.clear();
474    serverMap.clear();
475  }
476
477  @VisibleForTesting
478  public boolean isRegionInRegionStates(final RegionInfo hri) {
479    return (regionsMap.containsKey(hri.getRegionName()) || regionInTransition.containsKey(hri)
480        || regionOffline.containsKey(hri));
481  }
482
483  // ==========================================================================
484  //  RegionStateNode helpers
485  // ==========================================================================
486  protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) {
487    RegionStateNode newNode = new RegionStateNode(regionInfo);
488    RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode);
489    return oldNode != null ? oldNode : newNode;
490  }
491
492  protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) {
493    RegionStateNode node = regionsMap.get(regionInfo.getRegionName());
494    return node != null ? node : createRegionStateNode(regionInfo);
495  }
496
497  RegionStateNode getRegionStateNodeFromName(final byte[] regionName) {
498    return regionsMap.get(regionName);
499  }
500
501  public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) {
502    return getRegionStateNodeFromName(regionInfo.getRegionName());
503  }
504
505  public void deleteRegion(final RegionInfo regionInfo) {
506    regionsMap.remove(regionInfo.getRegionName());
507    // See HBASE-20860
508    // After master restarts, merged regions' RIT state may not be cleaned,
509    // making sure they are cleaned here
510    if (regionInTransition.containsKey(regionInfo)) {
511      regionInTransition.remove(regionInfo);
512    }
513    // Remove from the offline regions map too if there.
514    if (this.regionOffline.containsKey(regionInfo)) {
515      if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " + regionInfo);
516      this.regionOffline.remove(regionInfo);
517    }
518  }
519
520  public void deleteRegions(final List<RegionInfo> regionInfos) {
521    regionInfos.forEach(this::deleteRegion);
522  }
523
524  ArrayList<RegionStateNode> getTableRegionStateNodes(final TableName tableName) {
525    final ArrayList<RegionStateNode> regions = new ArrayList<RegionStateNode>();
526    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
527      if (!node.getTable().equals(tableName)) break;
528      regions.add(node);
529    }
530    return regions;
531  }
532
533  ArrayList<RegionState> getTableRegionStates(final TableName tableName) {
534    final ArrayList<RegionState> regions = new ArrayList<RegionState>();
535    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
536      if (!node.getTable().equals(tableName)) break;
537      regions.add(node.toRegionState());
538    }
539    return regions;
540  }
541
542  ArrayList<RegionInfo> getTableRegionsInfo(final TableName tableName) {
543    final ArrayList<RegionInfo> regions = new ArrayList<RegionInfo>();
544    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
545      if (!node.getTable().equals(tableName)) break;
546      regions.add(node.getRegionInfo());
547    }
548    return regions;
549  }
550
551  Collection<RegionStateNode> getRegionStateNodes() {
552    return regionsMap.values();
553  }
554
555  public ArrayList<RegionState> getRegionStates() {
556    final ArrayList<RegionState> regions = new ArrayList<RegionState>(regionsMap.size());
557    for (RegionStateNode node: regionsMap.values()) {
558      regions.add(node.toRegionState());
559    }
560    return regions;
561  }
562
563  // ==========================================================================
564  //  RegionState helpers
565  // ==========================================================================
566  public RegionState getRegionState(final RegionInfo regionInfo) {
567    RegionStateNode regionStateNode = getRegionStateNode(regionInfo);
568    return regionStateNode == null ? null : regionStateNode.toRegionState();
569  }
570
571  public RegionState getRegionState(final String encodedRegionName) {
572    // TODO: Need a map <encodedName, ...> but it is just dispatch merge...
573    for (RegionStateNode node: regionsMap.values()) {
574      if (node.getRegionInfo().getEncodedName().equals(encodedRegionName)) {
575        return node.toRegionState();
576      }
577    }
578    return null;
579  }
580
581  // ============================================================================================
582  //  TODO: helpers
583  // ============================================================================================
584  public boolean hasTableRegionStates(final TableName tableName) {
585    // TODO
586    return !getTableRegionStates(tableName).isEmpty();
587  }
588
589  /**
590   * @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
591   */
592  public List<RegionInfo> getRegionsOfTable(final TableName table) {
593    return getRegionsOfTable(table, false);
594  }
595
596  private HRegionLocation createRegionForReopen(RegionStateNode node) {
597    synchronized (node) {
598      if (!include(node, false)) {
599        return null;
600      }
601      if (node.isInState(State.OPEN)) {
602        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
603          node.getOpenSeqNum());
604      } else if (node.isInState(State.OPENING)) {
605        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), -1);
606      } else {
607        return null;
608      }
609    }
610  }
611
612  /**
613   * Get the regions to be reopened when modifying a table.
614   * <p/>
615   * Notice that the {@code openSeqNum} in the returned HRegionLocation is also used to indicate the
616   * state of this region, positive means the region is in {@link State#OPEN}, -1 means
617   * {@link State#OPENING}. And for regions in other states we do not need reopen them.
618   */
619  public List<HRegionLocation> getRegionsOfTableForReopen(TableName tableName) {
620    return getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen)
621      .filter(r -> r != null).collect(Collectors.toList());
622  }
623
624  /**
625   * Check whether the region has been reopened. The meaning of the {@link HRegionLocation} is the
626   * same with {@link #getRegionsOfTableForReopen(TableName)}.
627   * <p/>
628   * For a region which is in {@link State#OPEN} before, if the region state is changed or the open
629   * seq num is changed, we can confirm that it has been reopened.
630   * <p/>
631   * For a region which is in {@link State#OPENING} before, usually it will be in {@link State#OPEN}
632   * now and we will schedule a MRP to reopen it. But there are several exceptions:
633   * <ul>
634   * <li>The region is in state other than {@link State#OPEN} or {@link State#OPENING}.</li>
635   * <li>The location of the region has been changed</li>
636   * </ul>
637   * Of course the region could still be in {@link State#OPENING} state and still on the same
638   * server, then here we will still return a {@link HRegionLocation} for it, just like
639   * {@link #getRegionsOfTableForReopen(TableName)}.
640   * @param oldLoc the previous state/location of this region
641   * @return null if the region has been reopened, otherwise a new {@link HRegionLocation} which
642   *         means we still need to reopen the region.
643   * @see #getRegionsOfTableForReopen(TableName)
644   */
645  public HRegionLocation checkReopened(HRegionLocation oldLoc) {
646    RegionStateNode node = getRegionStateNode(oldLoc.getRegion());
647    // HBASE-20921
648    // if the oldLoc's state node does not exist, that means the region is
649    // merged or split, no need to check it
650    if (node == null) {
651      return null;
652    }
653    synchronized (node) {
654      if (oldLoc.getSeqNum() >= 0) {
655        // in OPEN state before
656        if (node.isInState(State.OPEN)) {
657          if (node.getOpenSeqNum() > oldLoc.getSeqNum()) {
658            // normal case, the region has been reopened
659            return null;
660          } else {
661            // the open seq num does not change, need to reopen again
662            return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
663              node.getOpenSeqNum());
664          }
665        } else {
666          // the state has been changed so we can make sure that the region has been reopened(not
667          // finished maybe, but not a problem).
668          return null;
669        }
670      } else {
671        // in OPENING state before
672        if (!node.isInState(State.OPEN, State.OPENING)) {
673          // not in OPEN or OPENING state, then we can make sure that the region has been
674          // reopened(not finished maybe, but not a problem)
675          return null;
676        } else {
677          if (!node.getRegionLocation().equals(oldLoc.getServerName())) {
678            // the region has been moved, so we can make sure that the region has been reopened.
679            return null;
680          }
681          // normal case, we are still in OPENING state, or the reopen has been opened and the state
682          // is changed to OPEN.
683          long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() : -1;
684          return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum);
685        }
686      }
687    }
688  }
689
690  /**
691   * @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
692   */
693  public List<RegionInfo> getRegionsOfTable(TableName table, boolean offline) {
694    return getRegionsOfTable(table, state -> include(state, offline));
695  }
696
697  /**
698   * @return Return the regions of the table; does not include OFFLINE unless you set
699   *         <code>offline</code> to true. Does not include regions that are in the
700   *         {@link State#SPLIT} state.
701   */
702  private List<RegionInfo> getRegionsOfTable(TableName table, Predicate<RegionStateNode> filter) {
703    return getTableRegionStateNodes(table).stream().filter(filter).map(n -> n.getRegionInfo())
704      .collect(Collectors.toList());
705  }
706
707  /**
708   * Utility. Whether to include region in list of regions. Default is to
709   * weed out split and offline regions.
710   * @return True if we should include the <code>node</code> (do not include
711   * if split or offline unless <code>offline</code> is set to true.
712   */
713  boolean include(final RegionStateNode node, final boolean offline) {
714    if (LOG.isTraceEnabled()) {
715      LOG.trace("WORKING ON " + node + " " + node.getRegionInfo());
716    }
717    if (node.isInState(State.SPLIT)) return false;
718    if (node.isInState(State.OFFLINE) && !offline) return false;
719    final RegionInfo hri = node.getRegionInfo();
720    return (!hri.isOffline() && !hri.isSplit()) ||
721        ((hri.isOffline() || hri.isSplit()) && offline);
722  }
723
724  /**
725   * Returns the set of regions hosted by the specified server
726   * @param serverName the server we are interested in
727   * @return set of RegionInfo hosted by the specified server
728   */
729  public List<RegionInfo> getServerRegionInfoSet(final ServerName serverName) {
730    final ServerStateNode serverInfo = getServerNode(serverName);
731    if (serverInfo == null) return Collections.emptyList();
732
733    synchronized (serverInfo) {
734      return serverInfo.getRegionInfoList();
735    }
736  }
737
738  // ============================================================================================
739  // Split helpers
740  // These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove
741  // the ServerStateNode by calling removeServer.
742  // ============================================================================================
743
744  private void setServerState(ServerName serverName, ServerState state) {
745    ServerStateNode serverNode = getOrCreateServer(serverName);
746    synchronized (serverNode) {
747      serverNode.setState(state);
748    }
749  }
750
751  /**
752   * Call this when we start meta log splitting a crashed Server.
753   * @see #metaLogSplit(ServerName)
754   */
755  public void metaLogSplitting(ServerName serverName) {
756    setServerState(serverName, ServerState.SPLITTING_META);
757  }
758
759  /**
760   * Called after we've split the meta logs on a crashed Server.
761   * @see #metaLogSplitting(ServerName)
762   */
763  public void metaLogSplit(ServerName serverName) {
764    setServerState(serverName, ServerState.SPLITTING_META_DONE);
765  }
766
767  /**
768   * Call this when we start log splitting for a crashed Server.
769   * @see #logSplit(ServerName)
770   */
771  public void logSplitting(final ServerName serverName) {
772    setServerState(serverName, ServerState.SPLITTING);
773  }
774
775  /**
776   * Called after we've split all logs on a crashed Server.
777   * @see #logSplitting(ServerName)
778   */
779  public void logSplit(final ServerName serverName) {
780    setServerState(serverName, ServerState.OFFLINE);
781  }
782
783  public void updateRegionState(final RegionInfo regionInfo, final State state) {
784    final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo);
785    synchronized (regionNode) {
786      regionNode.setState(state);
787    }
788  }
789
790  // ============================================================================================
791  //  TODO:
792  // ============================================================================================
793  public List<RegionInfo> getAssignedRegions() {
794    final List<RegionInfo> result = new ArrayList<RegionInfo>();
795    for (RegionStateNode node: regionsMap.values()) {
796      if (!node.isInTransition()) {
797        result.add(node.getRegionInfo());
798      }
799    }
800    return result;
801  }
802
803  public boolean isRegionInState(final RegionInfo regionInfo, final State... state) {
804    final RegionStateNode region = getRegionStateNode(regionInfo);
805    if (region != null) {
806      synchronized (region) {
807        return region.isInState(state);
808      }
809    }
810    return false;
811  }
812
813  public boolean isRegionOnline(final RegionInfo regionInfo) {
814    return isRegionInState(regionInfo, State.OPEN);
815  }
816
817  /**
818   * @return True if region is offline (In OFFLINE or CLOSED state).
819   */
820  public boolean isRegionOffline(final RegionInfo regionInfo) {
821    return isRegionInState(regionInfo, State.OFFLINE, State.CLOSED);
822  }
823
824  public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment(
825      final Collection<RegionInfo> regions) {
826    final Map<ServerName, List<RegionInfo>> result = new HashMap<ServerName, List<RegionInfo>>();
827    for (RegionInfo hri: regions) {
828      final RegionStateNode node = getRegionStateNode(hri);
829      if (node == null) continue;
830
831      // TODO: State.OPEN
832      final ServerName serverName = node.getRegionLocation();
833      if (serverName == null) continue;
834
835      List<RegionInfo> serverRegions = result.get(serverName);
836      if (serverRegions == null) {
837        serverRegions = new ArrayList<RegionInfo>();
838        result.put(serverName, serverRegions);
839      }
840
841      serverRegions.add(node.getRegionInfo());
842    }
843    return result;
844  }
845
846  public Map<RegionInfo, ServerName> getRegionAssignments() {
847    final HashMap<RegionInfo, ServerName> assignments = new HashMap<RegionInfo, ServerName>();
848    for (RegionStateNode node: regionsMap.values()) {
849      assignments.put(node.getRegionInfo(), node.getRegionLocation());
850    }
851    return assignments;
852  }
853
854  public Map<RegionState.State, List<RegionInfo>> getRegionByStateOfTable(TableName tableName) {
855    final State[] states = State.values();
856    final Map<RegionState.State, List<RegionInfo>> tableRegions =
857        new HashMap<State, List<RegionInfo>>(states.length);
858    for (int i = 0; i < states.length; ++i) {
859      tableRegions.put(states[i], new ArrayList<RegionInfo>());
860    }
861
862    for (RegionStateNode node: regionsMap.values()) {
863      if (node.getTable().equals(tableName)) {
864        tableRegions.get(node.getState()).add(node.getRegionInfo());
865      }
866    }
867    return tableRegions;
868  }
869
870  public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) {
871    final RegionStateNode region = getRegionStateNode(regionInfo);
872    if (region != null) {
873      synchronized (region) {
874        ServerName server = region.getRegionLocation();
875        return server != null ? server : region.getLastHost();
876      }
877    }
878    return null;
879  }
880
881  /**
882   * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
883   * Can't let out original since it can change and at least the load balancer
884   * wants to iterate this exported list.  We need to synchronize on regions
885   * since all access to this.servers is under a lock on this.regions.
886   * @param forceByCluster a flag to force to aggregate the server-load to the cluster level
887   * @return A clone of current assignments by table.
888   */
889  public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsByTable(
890      final boolean forceByCluster) {
891    if (!forceByCluster) return getAssignmentsByTable();
892
893    final HashMap<ServerName, List<RegionInfo>> ensemble =
894      new HashMap<ServerName, List<RegionInfo>>(serverMap.size());
895    for (ServerStateNode serverNode: serverMap.values()) {
896      ensemble.put(serverNode.getServerName(), serverNode.getRegionInfoList());
897    }
898
899    // TODO: can we use Collections.singletonMap(HConstants.ENSEMBLE_TABLE_NAME, ensemble)?
900    final Map<TableName, Map<ServerName, List<RegionInfo>>> result =
901      new HashMap<TableName, Map<ServerName, List<RegionInfo>>>(1);
902    result.put(HConstants.ENSEMBLE_TABLE_NAME, ensemble);
903    return result;
904  }
905
906  public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsByTable() {
907    final Map<TableName, Map<ServerName, List<RegionInfo>>> result = new HashMap<>();
908    for (RegionStateNode node: regionsMap.values()) {
909      Map<ServerName, List<RegionInfo>> tableResult = result.get(node.getTable());
910      if (tableResult == null) {
911        tableResult = new HashMap<ServerName, List<RegionInfo>>();
912        result.put(node.getTable(), tableResult);
913      }
914
915      final ServerName serverName = node.getRegionLocation();
916      if (serverName == null) {
917        LOG.info("Skipping, no server for " + node);
918        continue;
919      }
920      List<RegionInfo> serverResult = tableResult.get(serverName);
921      if (serverResult == null) {
922        serverResult = new ArrayList<RegionInfo>();
923        tableResult.put(serverName, serverResult);
924      }
925
926      serverResult.add(node.getRegionInfo());
927    }
928    // Add online servers with no assignment for the table.
929    for (Map<ServerName, List<RegionInfo>> table: result.values()) {
930        for (ServerName svr : serverMap.keySet()) {
931          if (!table.containsKey(svr)) {
932            table.put(svr, new ArrayList<RegionInfo>());
933          }
934        }
935    }
936    return result;
937  }
938
939  // ==========================================================================
940  //  Region in transition helpers
941  // ==========================================================================
942  protected boolean addRegionInTransition(final RegionStateNode regionNode,
943      final RegionTransitionProcedure procedure) {
944    if (procedure != null && !regionNode.setProcedure(procedure)) return false;
945
946    regionInTransition.put(regionNode.getRegionInfo(), regionNode);
947    return true;
948  }
949
950  protected void removeRegionInTransition(final RegionStateNode regionNode,
951      final RegionTransitionProcedure procedure) {
952    regionInTransition.remove(regionNode.getRegionInfo());
953    regionNode.unsetProcedure(procedure);
954  }
955
956  public boolean hasRegionsInTransition() {
957    return !regionInTransition.isEmpty();
958  }
959
960  public boolean isRegionInTransition(final RegionInfo regionInfo) {
961    final RegionStateNode node = regionInTransition.get(regionInfo);
962    return node != null ? node.isInTransition() : false;
963  }
964
965  /**
966   * @return If a procedure-in-transition for <code>hri</code>, return it else null.
967   */
968  public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) {
969    RegionStateNode node = regionInTransition.get(hri);
970    if (node == null) return null;
971    return node.getProcedure();
972  }
973
974  public RegionState getRegionTransitionState(final RegionInfo hri) {
975    RegionStateNode node = regionInTransition.get(hri);
976    if (node == null) return null;
977
978    synchronized (node) {
979      return node.isInTransition() ? node.toRegionState() : null;
980    }
981  }
982
983  public List<RegionStateNode> getRegionsInTransition() {
984    return new ArrayList<RegionStateNode>(regionInTransition.values());
985  }
986
987  /**
988   * Get the number of regions in transition.
989   */
990  public int getRegionsInTransitionCount() {
991    return regionInTransition.size();
992  }
993
994  public List<RegionState> getRegionsStateInTransition() {
995    final List<RegionState> rit = new ArrayList<RegionState>(regionInTransition.size());
996    for (RegionStateNode node: regionInTransition.values()) {
997      rit.add(node.toRegionState());
998    }
999    return rit;
1000  }
1001
1002  public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() {
1003    final SortedSet<RegionState> rit = new TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
1004    for (RegionStateNode node: regionInTransition.values()) {
1005      rit.add(node.toRegionState());
1006    }
1007    return rit;
1008  }
1009
1010  // ==========================================================================
1011  //  Region offline helpers
1012  // ==========================================================================
1013  // TODO: Populated when we read meta but regions never make it out of here.
1014  public void addToOfflineRegions(final RegionStateNode regionNode) {
1015    LOG.info("Added to offline, CURRENTLY NEVER CLEARED!!! " + regionNode);
1016    regionOffline.put(regionNode.getRegionInfo(), regionNode);
1017  }
1018
1019  // TODO: Unused.
1020  public void removeFromOfflineRegions(final RegionInfo regionInfo) {
1021    regionOffline.remove(regionInfo);
1022  }
1023
1024  // ==========================================================================
1025  //  Region FAIL_OPEN helpers
1026  // ==========================================================================
1027  public static final class RegionFailedOpen {
1028    private final RegionStateNode regionNode;
1029
1030    private volatile Exception exception = null;
1031    private AtomicInteger retries = new AtomicInteger();
1032
1033    public RegionFailedOpen(final RegionStateNode regionNode) {
1034      this.regionNode = regionNode;
1035    }
1036
1037    public RegionStateNode getRegionStateNode() {
1038      return regionNode;
1039    }
1040
1041    public RegionInfo getRegionInfo() {
1042      return regionNode.getRegionInfo();
1043    }
1044
1045    public int incrementAndGetRetries() {
1046      return this.retries.incrementAndGet();
1047    }
1048
1049    public int getRetries() {
1050      return retries.get();
1051    }
1052
1053    public void setException(final Exception exception) {
1054      this.exception = exception;
1055    }
1056
1057    public Exception getException() {
1058      return this.exception;
1059    }
1060  }
1061
1062  public RegionFailedOpen addToFailedOpen(final RegionStateNode regionNode) {
1063    final byte[] key = regionNode.getRegionInfo().getRegionName();
1064    RegionFailedOpen node = regionFailedOpen.get(key);
1065    if (node == null) {
1066      RegionFailedOpen newNode = new RegionFailedOpen(regionNode);
1067      RegionFailedOpen oldNode = regionFailedOpen.putIfAbsent(key, newNode);
1068      node = oldNode != null ? oldNode : newNode;
1069    }
1070    return node;
1071  }
1072
1073  public RegionFailedOpen getFailedOpen(final RegionInfo regionInfo) {
1074    return regionFailedOpen.get(regionInfo.getRegionName());
1075  }
1076
1077  public void removeFromFailedOpen(final RegionInfo regionInfo) {
1078    regionFailedOpen.remove(regionInfo.getRegionName());
1079  }
1080
1081  public List<RegionState> getRegionFailedOpen() {
1082    if (regionFailedOpen.isEmpty()) return Collections.emptyList();
1083
1084    ArrayList<RegionState> regions = new ArrayList<RegionState>(regionFailedOpen.size());
1085    for (RegionFailedOpen r: regionFailedOpen.values()) {
1086      regions.add(r.getRegionStateNode().toRegionState());
1087    }
1088    return regions;
1089  }
1090
1091  // ==========================================================================
1092  //  Servers
1093  // ==========================================================================
1094
1095  /**
1096   * Be judicious calling this method. Do it on server register ONLY otherwise
1097   * you could mess up online server accounting. TOOD: Review usage and convert
1098   * to {@link #getServerNode(ServerName)} where we can.
1099   */
1100  ServerStateNode getOrCreateServer(final ServerName serverName) {
1101    ServerStateNode node = serverMap.get(serverName);
1102    if (node == null) {
1103      LOG.trace("CREATING! {}", serverName, new RuntimeException("WHERE AM I?"));
1104      node = new ServerStateNode(serverName);
1105      ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node);
1106      node = oldNode != null ? oldNode : node;
1107    }
1108    return node;
1109  }
1110
1111  public void removeServer(final ServerName serverName) {
1112    serverMap.remove(serverName);
1113  }
1114
1115  public ServerStateNode getServerNode(final ServerName serverName) {
1116    return serverMap.get(serverName);
1117  }
1118
1119  public double getAverageLoad() {
1120    int numServers = 0;
1121    int totalLoad = 0;
1122    for (ServerStateNode node: serverMap.values()) {
1123      totalLoad += node.getRegionCount();
1124      numServers++;
1125    }
1126    return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers;
1127  }
1128
1129  /**
1130   * Add reference to region to serverstatenode.
1131   * DOES NOT AUTO-CREATE ServerStateNode instance.
1132   * @return Return serverstatenode or null if none.
1133   */
1134  ServerStateNode addRegionToServer(final RegionStateNode regionNode) {
1135    ServerStateNode ssn = getServerNode(regionNode.getRegionLocation());
1136    if (ssn == null) {
1137      return ssn;
1138    }
1139    ssn.addRegion(regionNode);
1140    return ssn;
1141  }
1142
1143  public ServerStateNode removeRegionFromServer(final ServerName serverName,
1144      final RegionStateNode regionNode) {
1145    ServerStateNode serverNode = getServerNode(serverName);
1146    if (serverNode != null) {
1147      serverNode.removeRegion(regionNode);
1148    }
1149    return serverNode;
1150  }
1151
1152  // ==========================================================================
1153  //  ToString helpers
1154  // ==========================================================================
1155  public static String regionNamesToString(final Collection<byte[]> regions) {
1156    final StringBuilder sb = new StringBuilder();
1157    final Iterator<byte[]> it = regions.iterator();
1158    sb.append("[");
1159    if (it.hasNext()) {
1160      sb.append(Bytes.toStringBinary(it.next()));
1161      while (it.hasNext()) {
1162        sb.append(", ");
1163        sb.append(Bytes.toStringBinary(it.next()));
1164      }
1165    }
1166    sb.append("]");
1167    return sb.toString();
1168  }
1169}