001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.master.assignment;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.Comparator;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Set;
032import java.util.SortedSet;
033import java.util.TreeSet;
034import java.util.concurrent.ConcurrentHashMap;
035import java.util.concurrent.ConcurrentSkipListMap;
036import java.util.concurrent.atomic.AtomicInteger;
037import java.util.function.Predicate;
038import java.util.stream.Collectors;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.HRegionLocation;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.RegionReplicaUtil;
045import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
046import org.apache.hadoop.hbase.master.RegionState;
047import org.apache.hadoop.hbase.master.RegionState.State;
048import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
051import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
052import org.apache.yetus.audience.InterfaceAudience;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
057
058/**
059 * RegionStates contains a set of Maps that describes the in-memory state of the AM, with
060 * the regions available in the system, the region in transition, the offline regions and
061 * the servers holding regions.
062 */
063@InterfaceAudience.Private
064public class RegionStates {
065  private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class);
066
067  protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
068    State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver
069                // but before complete finish of AssignProcedure. HBASE-20100.
070    State.OFFLINE, State.CLOSED,      // disable/offline
071    State.SPLITTING, State.SPLIT,     // ServerCrashProcedure
072    State.OPENING, State.FAILED_OPEN, // already in-progress (retrying)
073  };
074
075  protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
076    State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure
077    State.OPEN,                   // enabled/open
078    State.CLOSING                 // already in-progress (retrying)
079  };
080
081  private static class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
082    public AssignmentProcedureEvent(final RegionInfo regionInfo) {
083      super(regionInfo);
084    }
085  }
086
087  private static class ServerReportEvent extends ProcedureEvent<ServerName> {
088    public ServerReportEvent(final ServerName serverName) {
089      super(serverName);
090    }
091  }
092
093  /**
094   * Current Region State.
095   * In-memory only. Not persisted.
096   */
097  // Mutable/Immutable? Changes have to be synchronized or not?
098  // Data members are volatile which seems to say multi-threaded access is fine.
099  // In the below we do check and set but the check state could change before
100  // we do the set because no synchronization....which seems dodgy. Clear up
101  // understanding here... how many threads accessing? Do locks make it so one
102  // thread at a time working on a single Region's RegionStateNode? Lets presume
103  // so for now. Odd is that elsewhere in this RegionStates, we synchronize on
104  // the RegionStateNode instance. TODO.
105  public static class RegionStateNode implements Comparable<RegionStateNode> {
106    private final RegionInfo regionInfo;
107    private final ProcedureEvent<?> event;
108
109    private volatile RegionTransitionProcedure procedure = null;
110    private volatile ServerName regionLocation = null;
111    // notice that, the lastHost will only be updated when a region is successfully CLOSED through
112    // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
113    // with the data in meta.
114    private volatile ServerName lastHost = null;
115    /**
116     * A Region-in-Transition (RIT) moves through states.
117     * See {@link State} for complete list. A Region that
118     * is opened moves from OFFLINE => OPENING => OPENED.
119     */
120    private volatile State state = State.OFFLINE;
121
122    /**
123     * Updated whenever a call to {@link #setRegionLocation(ServerName)}
124     * or {@link #setState(State, State...)}.
125     */
126    private volatile long lastUpdate = 0;
127
128    private volatile long openSeqNum = HConstants.NO_SEQNUM;
129
130    public RegionStateNode(final RegionInfo regionInfo) {
131      this.regionInfo = regionInfo;
132      this.event = new AssignmentProcedureEvent(regionInfo);
133    }
134
135    /**
136     * @param update new region state this node should be assigned.
137     * @param expected current state should be in this given list of expected states
138     * @return true, if current state is in expected list; otherwise false.
139     */
140    public boolean setState(final State update, final State... expected) {
141      if (!isInState(expected)) {
142        return false;
143      }
144      this.state = update;
145      this.lastUpdate = EnvironmentEdgeManager.currentTime();
146      return true;
147    }
148
149    /**
150     * Put region into OFFLINE mode (set state and clear location).
151     * @return Last recorded server deploy
152     */
153    public ServerName offline() {
154      setState(State.OFFLINE);
155      return setRegionLocation(null);
156    }
157
158    /**
159     * Set new {@link State} but only if currently in <code>expected</code> State
160     * (if not, throw {@link UnexpectedStateException}.
161     */
162    public void transitionState(final State update, final State... expected)
163    throws UnexpectedStateException {
164      if (!setState(update, expected)) {
165        throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
166          " so could move to " + update + " but current state=" + getState());
167      }
168    }
169
170    public boolean isInState(final State... expected) {
171      if (expected != null && expected.length > 0) {
172        boolean expectedState = false;
173        for (int i = 0; i < expected.length; ++i) {
174          expectedState |= (getState() == expected[i]);
175        }
176        return expectedState;
177      }
178      return true;
179    }
180
181    public boolean isStuck() {
182      return isInState(State.FAILED_OPEN) && getProcedure() != null;
183    }
184
185    public boolean isInTransition() {
186      return getProcedure() != null;
187    }
188
189    public long getLastUpdate() {
190      return procedure != null ? procedure.getLastUpdate() : lastUpdate;
191    }
192
193    public void setLastHost(final ServerName serverName) {
194      this.lastHost = serverName;
195    }
196
197    public void setOpenSeqNum(final long seqId) {
198      this.openSeqNum = seqId;
199    }
200
201    public ServerName setRegionLocation(final ServerName serverName) {
202      ServerName lastRegionLocation = this.regionLocation;
203      if (LOG.isTraceEnabled() && serverName == null) {
204        LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
205      }
206      this.regionLocation = serverName;
207      this.lastUpdate = EnvironmentEdgeManager.currentTime();
208      return lastRegionLocation;
209    }
210
211    public boolean setProcedure(final RegionTransitionProcedure proc) {
212      if (this.procedure != null && this.procedure != proc) {
213        return false;
214      }
215      this.procedure = proc;
216      return true;
217    }
218
219    public boolean unsetProcedure(final RegionTransitionProcedure proc) {
220      if (this.procedure != null && this.procedure != proc) {
221        return false;
222      }
223      this.procedure = null;
224      return true;
225    }
226
227    public RegionTransitionProcedure getProcedure() {
228      return procedure;
229    }
230
231    public ProcedureEvent<?> getProcedureEvent() {
232      return event;
233    }
234
235    public RegionInfo getRegionInfo() {
236      return regionInfo;
237    }
238
239    public TableName getTable() {
240      return getRegionInfo().getTable();
241    }
242
243    public boolean isSystemTable() {
244      return getTable().isSystemTable();
245    }
246
247    public ServerName getLastHost() {
248      return lastHost;
249    }
250
251    public ServerName getRegionLocation() {
252      return regionLocation;
253    }
254
255    public State getState() {
256      return state;
257    }
258
259    public long getOpenSeqNum() {
260      return openSeqNum;
261    }
262
263    public int getFormatVersion() {
264      // we don't have any format for now
265      // it should probably be in regionInfo.getFormatVersion()
266      return 0;
267    }
268
269    public RegionState toRegionState() {
270      return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
271    }
272
273    @Override
274    public int compareTo(final RegionStateNode other) {
275      // NOTE: RegionInfo sort by table first, so we are relying on that.
276      // we have a TestRegionState#testOrderedByTable() that check for that.
277      return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
278    }
279
280    @Override
281    public int hashCode() {
282      return getRegionInfo().hashCode();
283    }
284
285    @Override
286    public boolean equals(final Object other) {
287      if (this == other) return true;
288      if (!(other instanceof RegionStateNode)) return false;
289      return compareTo((RegionStateNode)other) == 0;
290    }
291
292    @Override
293    public String toString() {
294      return toDescriptiveString();
295    }
296
297    public String toShortString() {
298      // rit= is the current Region-In-Transition State -- see State enum.
299      return String.format("rit=%s, location=%s", getState(), getRegionLocation());
300    }
301
302    public String toDescriptiveString() {
303      return String.format("%s, table=%s, region=%s",
304        toShortString(), getTable(), getRegionInfo().getEncodedName());
305    }
306  }
307
308  // This comparator sorts the RegionStates by time stamp then Region name.
309  // Comparing by timestamp alone can lead us to discard different RegionStates that happen
310  // to share a timestamp.
311  private static class RegionStateStampComparator implements Comparator<RegionState> {
312    @Override
313    public int compare(final RegionState l, final RegionState r) {
314      int stampCmp = Long.compare(l.getStamp(), r.getStamp());
315      return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
316    }
317  }
318
319  /**
320   * Server State.
321   */
322  public enum ServerState {
323    /**
324     * Initial state. Available.
325     */
326    ONLINE,
327
328    /**
329     * Only server which carries meta can have this state. We will split wal for meta and then
330     * assign meta first before splitting other wals.
331     */
332    SPLITTING_META,
333
334    /**
335     * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure
336     * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more
337     * details.
338     */
339    SPLITTING_META_DONE,
340
341    /**
342     * Server expired/crashed. Currently undergoing WAL splitting.
343     */
344    SPLITTING,
345
346    /**
347     * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely
348     * quit. See the comments in UnassignProcedure.remoteCallFailed for more details.
349     */
350    OFFLINE
351  }
352
353  /**
354   * State of Server; list of hosted regions, etc.
355   */
356  public static class ServerStateNode implements Comparable<ServerStateNode> {
357    private final ServerReportEvent reportEvent;
358
359    private final Set<RegionStateNode> regions;
360    private final ServerName serverName;
361
362    private volatile ServerState state = ServerState.ONLINE;
363
364    public ServerStateNode(final ServerName serverName) {
365      this.serverName = serverName;
366      this.regions = ConcurrentHashMap.newKeySet();
367      this.reportEvent = new ServerReportEvent(serverName);
368    }
369
370    public ServerName getServerName() {
371      return serverName;
372    }
373
374    public ServerState getState() {
375      return state;
376    }
377
378    public ProcedureEvent<?> getReportEvent() {
379      return reportEvent;
380    }
381
382    public boolean isInState(final ServerState... expected) {
383      boolean expectedState = false;
384      if (expected != null) {
385        for (int i = 0; i < expected.length; ++i) {
386          expectedState |= (state == expected[i]);
387        }
388      }
389      return expectedState;
390    }
391
392    private void setState(final ServerState state) {
393      this.state = state;
394    }
395
396    public Set<RegionStateNode> getRegions() {
397      return regions;
398    }
399
400    public int getRegionCount() {
401      return regions.size();
402    }
403
404    public ArrayList<RegionInfo> getRegionInfoList() {
405      ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size());
406      for (RegionStateNode region: regions) {
407        hris.add(region.getRegionInfo());
408      }
409      return hris;
410    }
411
412    public void addRegion(final RegionStateNode regionNode) {
413      this.regions.add(regionNode);
414    }
415
416    public void removeRegion(final RegionStateNode regionNode) {
417      this.regions.remove(regionNode);
418    }
419
420    @Override
421    public int compareTo(final ServerStateNode other) {
422      return getServerName().compareTo(other.getServerName());
423    }
424
425    @Override
426    public int hashCode() {
427      return getServerName().hashCode();
428    }
429
430    @Override
431    public boolean equals(final Object other) {
432      if (this == other) return true;
433      if (!(other instanceof ServerStateNode)) return false;
434      return compareTo((ServerStateNode)other) == 0;
435    }
436
437    @Override
438    public String toString() {
439      return String.format("name=%s, state=%s, regionCount=%d", getServerName(), getState(),
440          getRegionCount());
441    }
442  }
443
444  public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
445      new RegionStateStampComparator();
446
447  // TODO: Replace the ConcurrentSkipListMaps
448  /**
449   * RegionName -- i.e. RegionInfo.getRegionName() -- as bytes to {@link RegionStateNode}
450   */
451  private final ConcurrentSkipListMap<byte[], RegionStateNode> regionsMap =
452      new ConcurrentSkipListMap<byte[], RegionStateNode>(Bytes.BYTES_COMPARATOR);
453
454  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionInTransition =
455    new ConcurrentSkipListMap<RegionInfo, RegionStateNode>(RegionInfo.COMPARATOR);
456
457  /**
458   * Regions marked as offline on a read of hbase:meta. Unused or at least, once
459   * offlined, regions have no means of coming on line again. TODO.
460   */
461  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionOffline =
462    new ConcurrentSkipListMap<RegionInfo, RegionStateNode>();
463
464  private final ConcurrentSkipListMap<byte[], RegionFailedOpen> regionFailedOpen =
465    new ConcurrentSkipListMap<byte[], RegionFailedOpen>(Bytes.BYTES_COMPARATOR);
466
467  private final ConcurrentHashMap<ServerName, ServerStateNode> serverMap =
468      new ConcurrentHashMap<ServerName, ServerStateNode>();
469
470  public RegionStates() { }
471
472  public void clear() {
473    regionsMap.clear();
474    regionInTransition.clear();
475    regionOffline.clear();
476    serverMap.clear();
477  }
478
479  @VisibleForTesting
480  public boolean isRegionInRegionStates(final RegionInfo hri) {
481    return (regionsMap.containsKey(hri.getRegionName()) || regionInTransition.containsKey(hri)
482        || regionOffline.containsKey(hri));
483  }
484
485  // ==========================================================================
486  //  RegionStateNode helpers
487  // ==========================================================================
488  protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) {
489    RegionStateNode newNode = new RegionStateNode(regionInfo);
490    RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode);
491    return oldNode != null ? oldNode : newNode;
492  }
493
494  protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) {
495    RegionStateNode node = regionsMap.get(regionInfo.getRegionName());
496    return node != null ? node : createRegionStateNode(regionInfo);
497  }
498
499  RegionStateNode getRegionStateNodeFromName(final byte[] regionName) {
500    return regionsMap.get(regionName);
501  }
502
503  public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) {
504    return getRegionStateNodeFromName(regionInfo.getRegionName());
505  }
506
507  public void deleteRegion(final RegionInfo regionInfo) {
508    regionsMap.remove(regionInfo.getRegionName());
509    // See HBASE-20860
510    // After master restarts, merged regions' RIT state may not be cleaned,
511    // making sure they are cleaned here
512    if (regionInTransition.containsKey(regionInfo)) {
513      regionInTransition.remove(regionInfo);
514    }
515    // Remove from the offline regions map too if there.
516    if (this.regionOffline.containsKey(regionInfo)) {
517      if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " + regionInfo);
518      this.regionOffline.remove(regionInfo);
519    }
520  }
521
522  public void deleteRegions(final List<RegionInfo> regionInfos) {
523    regionInfos.forEach(this::deleteRegion);
524  }
525
526  ArrayList<RegionStateNode> getTableRegionStateNodes(final TableName tableName) {
527    final ArrayList<RegionStateNode> regions = new ArrayList<RegionStateNode>();
528    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
529      if (!node.getTable().equals(tableName)) break;
530      regions.add(node);
531    }
532    return regions;
533  }
534
535  ArrayList<RegionState> getTableRegionStates(final TableName tableName) {
536    final ArrayList<RegionState> regions = new ArrayList<RegionState>();
537    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
538      if (!node.getTable().equals(tableName)) break;
539      regions.add(node.toRegionState());
540    }
541    return regions;
542  }
543
544  ArrayList<RegionInfo> getTableRegionsInfo(final TableName tableName) {
545    final ArrayList<RegionInfo> regions = new ArrayList<RegionInfo>();
546    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
547      if (!node.getTable().equals(tableName)) break;
548      regions.add(node.getRegionInfo());
549    }
550    return regions;
551  }
552
553  Collection<RegionStateNode> getRegionStateNodes() {
554    return regionsMap.values();
555  }
556
557  public ArrayList<RegionState> getRegionStates() {
558    final ArrayList<RegionState> regions = new ArrayList<RegionState>(regionsMap.size());
559    for (RegionStateNode node: regionsMap.values()) {
560      regions.add(node.toRegionState());
561    }
562    return regions;
563  }
564
565  // ==========================================================================
566  //  RegionState helpers
567  // ==========================================================================
568  public RegionState getRegionState(final RegionInfo regionInfo) {
569    RegionStateNode regionStateNode = getRegionStateNode(regionInfo);
570    return regionStateNode == null ? null : regionStateNode.toRegionState();
571  }
572
573  public RegionState getRegionState(final String encodedRegionName) {
574    // TODO: Need a map <encodedName, ...> but it is just dispatch merge...
575    for (RegionStateNode node: regionsMap.values()) {
576      if (node.getRegionInfo().getEncodedName().equals(encodedRegionName)) {
577        return node.toRegionState();
578      }
579    }
580    return null;
581  }
582
583  // ============================================================================================
584  //  TODO: helpers
585  // ============================================================================================
586  public boolean hasTableRegionStates(final TableName tableName) {
587    // TODO
588    return !getTableRegionStates(tableName).isEmpty();
589  }
590
591  /**
592   * @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
593   */
594  public List<RegionInfo> getRegionsOfTable(final TableName table) {
595    return getRegionsOfTable(table, false);
596  }
597
598  private HRegionLocation createRegionForReopen(RegionStateNode node) {
599    synchronized (node) {
600      if (!include(node, false)) {
601        return null;
602      }
603      if (node.isInState(State.OPEN)) {
604        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
605          node.getOpenSeqNum());
606      } else if (node.isInState(State.OPENING)) {
607        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), -1);
608      } else {
609        return null;
610      }
611    }
612  }
613
614  /**
615   * Get the regions to be reopened when modifying a table.
616   * <p/>
617   * Notice that the {@code openSeqNum} in the returned HRegionLocation is also used to indicate the
618   * state of this region, positive means the region is in {@link State#OPEN}, -1 means
619   * {@link State#OPENING}. And for regions in other states we do not need reopen them.
620   */
621  public List<HRegionLocation> getRegionsOfTableForReopen(TableName tableName) {
622    return getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen)
623      .filter(r -> r != null).collect(Collectors.toList());
624  }
625
626  /**
627   * Check whether the region has been reopened. The meaning of the {@link HRegionLocation} is the
628   * same with {@link #getRegionsOfTableForReopen(TableName)}.
629   * <p/>
630   * For a region which is in {@link State#OPEN} before, if the region state is changed or the open
631   * seq num is changed, we can confirm that it has been reopened.
632   * <p/>
633   * For a region which is in {@link State#OPENING} before, usually it will be in {@link State#OPEN}
634   * now and we will schedule a MRP to reopen it. But there are several exceptions:
635   * <ul>
636   * <li>The region is in state other than {@link State#OPEN} or {@link State#OPENING}.</li>
637   * <li>The location of the region has been changed</li>
638   * </ul>
639   * Of course the region could still be in {@link State#OPENING} state and still on the same
640   * server, then here we will still return a {@link HRegionLocation} for it, just like
641   * {@link #getRegionsOfTableForReopen(TableName)}.
642   * @param oldLoc the previous state/location of this region
643   * @return null if the region has been reopened, otherwise a new {@link HRegionLocation} which
644   *         means we still need to reopen the region.
645   * @see #getRegionsOfTableForReopen(TableName)
646   */
647  public HRegionLocation checkReopened(HRegionLocation oldLoc) {
648    RegionStateNode node = getRegionStateNode(oldLoc.getRegion());
649    // HBASE-20921
650    // if the oldLoc's state node does not exist, that means the region is
651    // merged or split, no need to check it
652    if (node == null) {
653      return null;
654    }
655    synchronized (node) {
656      if (oldLoc.getSeqNum() >= 0) {
657        // in OPEN state before
658        if (node.isInState(State.OPEN)) {
659          if (node.getOpenSeqNum() > oldLoc.getSeqNum()) {
660            // normal case, the region has been reopened
661            return null;
662          } else {
663            // the open seq num does not change, need to reopen again
664            return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
665              node.getOpenSeqNum());
666          }
667        } else {
668          // the state has been changed so we can make sure that the region has been reopened(not
669          // finished maybe, but not a problem).
670          return null;
671        }
672      } else {
673        // in OPENING state before
674        if (!node.isInState(State.OPEN, State.OPENING)) {
675          // not in OPEN or OPENING state, then we can make sure that the region has been
676          // reopened(not finished maybe, but not a problem)
677          return null;
678        } else {
679          if (!node.getRegionLocation().equals(oldLoc.getServerName())) {
680            // the region has been moved, so we can make sure that the region has been reopened.
681            return null;
682          }
683          // normal case, we are still in OPENING state, or the reopen has been opened and the state
684          // is changed to OPEN.
685          long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() : -1;
686          return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum);
687        }
688      }
689    }
690  }
691
692  /**
693   * @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
694   */
695  public List<RegionInfo> getRegionsOfTable(TableName table, boolean offline) {
696    return getRegionsOfTable(table, state -> include(state, offline));
697  }
698
699  /**
700   * @return Return the regions of the table; does not include OFFLINE unless you set
701   *         <code>offline</code> to true. Does not include regions that are in the
702   *         {@link State#SPLIT} state.
703   */
704  private List<RegionInfo> getRegionsOfTable(TableName table, Predicate<RegionStateNode> filter) {
705    return getTableRegionStateNodes(table).stream().filter(filter).map(n -> n.getRegionInfo())
706      .collect(Collectors.toList());
707  }
708
709  /**
710   * Utility. Whether to include region in list of regions. Default is to
711   * weed out split and offline regions.
712   * @return True if we should include the <code>node</code> (do not include
713   * if split or offline unless <code>offline</code> is set to true.
714   */
715  boolean include(final RegionStateNode node, final boolean offline) {
716    if (LOG.isTraceEnabled()) {
717      LOG.trace("WORKING ON " + node + " " + node.getRegionInfo());
718    }
719    if (node.isInState(State.SPLIT)) return false;
720    if (node.isInState(State.OFFLINE) && !offline) return false;
721    final RegionInfo hri = node.getRegionInfo();
722    return (!hri.isOffline() && !hri.isSplit()) ||
723        ((hri.isOffline() || hri.isSplit()) && offline);
724  }
725
726  /**
727   * Returns the set of regions hosted by the specified server
728   * @param serverName the server we are interested in
729   * @return set of RegionInfo hosted by the specified server
730   */
731  public List<RegionInfo> getServerRegionInfoSet(final ServerName serverName) {
732    final ServerStateNode serverInfo = getServerNode(serverName);
733    if (serverInfo == null) return Collections.emptyList();
734
735    synchronized (serverInfo) {
736      return serverInfo.getRegionInfoList();
737    }
738  }
739
740  // ============================================================================================
741  // Split helpers
742  // These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove
743  // the ServerStateNode by calling removeServer.
744  // ============================================================================================
745
746  private void setServerState(ServerName serverName, ServerState state) {
747    ServerStateNode serverNode = getOrCreateServer(serverName);
748    synchronized (serverNode) {
749      serverNode.setState(state);
750    }
751  }
752
753  /**
754   * Call this when we start meta log splitting a crashed Server.
755   * @see #metaLogSplit(ServerName)
756   */
757  public void metaLogSplitting(ServerName serverName) {
758    setServerState(serverName, ServerState.SPLITTING_META);
759  }
760
761  /**
762   * Called after we've split the meta logs on a crashed Server.
763   * @see #metaLogSplitting(ServerName)
764   */
765  public void metaLogSplit(ServerName serverName) {
766    setServerState(serverName, ServerState.SPLITTING_META_DONE);
767  }
768
769  /**
770   * Call this when we start log splitting for a crashed Server.
771   * @see #logSplit(ServerName)
772   */
773  public void logSplitting(final ServerName serverName) {
774    setServerState(serverName, ServerState.SPLITTING);
775  }
776
777  /**
778   * Called after we've split all logs on a crashed Server.
779   * @see #logSplitting(ServerName)
780   */
781  public void logSplit(final ServerName serverName) {
782    setServerState(serverName, ServerState.OFFLINE);
783  }
784
785  public void updateRegionState(final RegionInfo regionInfo, final State state) {
786    final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo);
787    synchronized (regionNode) {
788      regionNode.setState(state);
789    }
790  }
791
792  // ============================================================================================
793  //  TODO:
794  // ============================================================================================
795  public List<RegionInfo> getAssignedRegions() {
796    final List<RegionInfo> result = new ArrayList<RegionInfo>();
797    for (RegionStateNode node: regionsMap.values()) {
798      if (!node.isInTransition()) {
799        result.add(node.getRegionInfo());
800      }
801    }
802    return result;
803  }
804
805  public boolean isRegionInState(final RegionInfo regionInfo, final State... state) {
806    final RegionStateNode region = getRegionStateNode(regionInfo);
807    if (region != null) {
808      synchronized (region) {
809        return region.isInState(state);
810      }
811    }
812    return false;
813  }
814
815  public boolean isRegionOnline(final RegionInfo regionInfo) {
816    return isRegionInState(regionInfo, State.OPEN);
817  }
818
819  /**
820   * @return True if region is offline (In OFFLINE or CLOSED state).
821   */
822  public boolean isRegionOffline(final RegionInfo regionInfo) {
823    return isRegionInState(regionInfo, State.OFFLINE, State.CLOSED);
824  }
825
826  public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment(
827      final Collection<RegionInfo> regions) {
828    final Map<ServerName, List<RegionInfo>> result = new HashMap<ServerName, List<RegionInfo>>();
829    if (regions != null) {
830      for (RegionInfo hri : regions) {
831        final RegionStateNode node = getRegionStateNode(hri);
832        if (node == null) {
833          continue;
834        }
835        createSnapshot(node, result);
836      }
837    } else {
838      for (RegionStateNode node : regionsMap.values()) {
839        if (node == null) {
840          continue;
841        }
842        createSnapshot(node, result);
843      }
844    }
845    return result;
846  }
847
848  private void createSnapshot(RegionStateNode node, Map<ServerName, List<RegionInfo>> result) {
849    final ServerName serverName = node.getRegionLocation();
850    if (serverName == null) {
851      return;
852    }
853
854    List<RegionInfo> serverRegions = result.get(serverName);
855    if (serverRegions == null) {
856      serverRegions = new ArrayList<RegionInfo>();
857      result.put(serverName, serverRegions);
858    }
859    serverRegions.add(node.getRegionInfo());
860  }
861
862  public Map<RegionInfo, ServerName> getRegionAssignments() {
863    final HashMap<RegionInfo, ServerName> assignments = new HashMap<RegionInfo, ServerName>();
864    for (RegionStateNode node: regionsMap.values()) {
865      assignments.put(node.getRegionInfo(), node.getRegionLocation());
866    }
867    return assignments;
868  }
869
870  public Map<RegionState.State, List<RegionInfo>> getRegionByStateOfTable(TableName tableName) {
871    final State[] states = State.values();
872    final Map<RegionState.State, List<RegionInfo>> tableRegions =
873        new HashMap<State, List<RegionInfo>>(states.length);
874    for (int i = 0; i < states.length; ++i) {
875      tableRegions.put(states[i], new ArrayList<RegionInfo>());
876    }
877
878    for (RegionStateNode node: regionsMap.values()) {
879      if (node.getTable().equals(tableName)) {
880        tableRegions.get(node.getState()).add(node.getRegionInfo());
881      }
882    }
883    return tableRegions;
884  }
885
886  public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) {
887    final RegionStateNode region = getRegionStateNode(regionInfo);
888    if (region != null) {
889      synchronized (region) {
890        ServerName server = region.getRegionLocation();
891        return server != null ? server : region.getLastHost();
892      }
893    }
894    return null;
895  }
896
897  /**
898   * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
899   * Can't let out original since it can change and at least the load balancer
900   * wants to iterate this exported list.  We need to synchronize on regions
901   * since all access to this.servers is under a lock on this.regions.
902   *
903   * @param isByTable If <code>true</code>, return the assignments by table. If <code>false</code>,
904   *                  return the assignments which aggregate the server-load to the cluster level.
905   * @return A clone of current assignments.
906   */
907  public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsForBalancer(
908      boolean isByTable) {
909    final Map<TableName, Map<ServerName, List<RegionInfo>>> result = new HashMap<>();
910    if (isByTable) {
911      for (RegionStateNode node : regionsMap.values()) {
912        Map<ServerName, List<RegionInfo>> tableResult =
913            result.computeIfAbsent(node.getTable(), t -> new HashMap<>());
914        final ServerName serverName = node.getRegionLocation();
915        if (serverName == null) {
916          LOG.info("Skipping, no server for " + node);
917          continue;
918        }
919        List<RegionInfo> serverResult =
920            tableResult.computeIfAbsent(serverName, s -> new ArrayList<>());
921        serverResult.add(node.getRegionInfo());
922      }
923      // Add online servers with no assignment for the table.
924      for (Map<ServerName, List<RegionInfo>> table : result.values()) {
925        for (ServerName serverName : serverMap.keySet()) {
926          table.putIfAbsent(serverName, new ArrayList<>());
927        }
928      }
929    } else {
930      final HashMap<ServerName, List<RegionInfo>> ensemble = new HashMap<>(serverMap.size());
931      for (ServerStateNode serverNode : serverMap.values()) {
932        ensemble.put(serverNode.getServerName(), serverNode.getRegionInfoList());
933      }
934      // Use a fake table name to represent the whole cluster's assignments
935      result.put(HConstants.ENSEMBLE_TABLE_NAME, ensemble);
936    }
937    return result;
938  }
939
940  // ==========================================================================
941  //  Region in transition helpers
942  // ==========================================================================
943  protected boolean addRegionInTransition(final RegionStateNode regionNode,
944      final RegionTransitionProcedure procedure) {
945    if (procedure != null && !regionNode.setProcedure(procedure)) return false;
946
947    regionInTransition.put(regionNode.getRegionInfo(), regionNode);
948    return true;
949  }
950
951  protected void removeRegionInTransition(final RegionStateNode regionNode,
952      final RegionTransitionProcedure procedure) {
953    regionInTransition.remove(regionNode.getRegionInfo());
954    regionNode.unsetProcedure(procedure);
955  }
956
957  public boolean hasRegionsInTransition() {
958    return !regionInTransition.isEmpty();
959  }
960
961  public boolean isRegionInTransition(final RegionInfo regionInfo) {
962    final RegionStateNode node = regionInTransition.get(regionInfo);
963    return node != null ? node.isInTransition() : false;
964  }
965
966  /**
967   * @return If a procedure-in-transition for <code>hri</code>, return it else null.
968   */
969  public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) {
970    RegionStateNode node = regionInTransition.get(hri);
971    if (node == null) return null;
972    return node.getProcedure();
973  }
974
975  public RegionState getRegionTransitionState(final RegionInfo hri) {
976    RegionStateNode node = regionInTransition.get(hri);
977    if (node == null) return null;
978
979    synchronized (node) {
980      return node.isInTransition() ? node.toRegionState() : null;
981    }
982  }
983
984  public List<RegionStateNode> getRegionsInTransition() {
985    return new ArrayList<RegionStateNode>(regionInTransition.values());
986  }
987
988  /**
989   * Get the number of regions in transition.
990   */
991  public int getRegionsInTransitionCount() {
992    return regionInTransition.size();
993  }
994
995  public List<RegionState> getRegionsStateInTransition() {
996    final List<RegionState> rit = new ArrayList<RegionState>(regionInTransition.size());
997    for (RegionStateNode node: regionInTransition.values()) {
998      rit.add(node.toRegionState());
999    }
1000    return rit;
1001  }
1002
1003  public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() {
1004    final SortedSet<RegionState> rit = new TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
1005    for (RegionStateNode node: regionInTransition.values()) {
1006      rit.add(node.toRegionState());
1007    }
1008    return rit;
1009  }
1010
1011  // ==========================================================================
1012  //  Region offline helpers
1013  // ==========================================================================
1014  // TODO: Populated when we read meta but regions never make it out of here.
1015  public void addToOfflineRegions(final RegionStateNode regionNode) {
1016    LOG.info("Added to offline, CURRENTLY NEVER CLEARED!!! " + regionNode);
1017    regionOffline.put(regionNode.getRegionInfo(), regionNode);
1018  }
1019
1020  // TODO: Unused.
1021  public void removeFromOfflineRegions(final RegionInfo regionInfo) {
1022    regionOffline.remove(regionInfo);
1023  }
1024
1025  // ==========================================================================
1026  //  Region FAIL_OPEN helpers
1027  // ==========================================================================
1028  public static final class RegionFailedOpen {
1029    private final RegionStateNode regionNode;
1030
1031    private volatile Exception exception = null;
1032    private AtomicInteger retries = new AtomicInteger();
1033
1034    public RegionFailedOpen(final RegionStateNode regionNode) {
1035      this.regionNode = regionNode;
1036    }
1037
1038    public RegionStateNode getRegionStateNode() {
1039      return regionNode;
1040    }
1041
1042    public RegionInfo getRegionInfo() {
1043      return regionNode.getRegionInfo();
1044    }
1045
1046    public int incrementAndGetRetries() {
1047      return this.retries.incrementAndGet();
1048    }
1049
1050    public int getRetries() {
1051      return retries.get();
1052    }
1053
1054    public void setException(final Exception exception) {
1055      this.exception = exception;
1056    }
1057
1058    public Exception getException() {
1059      return this.exception;
1060    }
1061  }
1062
1063  public RegionFailedOpen addToFailedOpen(final RegionStateNode regionNode) {
1064    final byte[] key = regionNode.getRegionInfo().getRegionName();
1065    RegionFailedOpen node = regionFailedOpen.get(key);
1066    if (node == null) {
1067      RegionFailedOpen newNode = new RegionFailedOpen(regionNode);
1068      RegionFailedOpen oldNode = regionFailedOpen.putIfAbsent(key, newNode);
1069      node = oldNode != null ? oldNode : newNode;
1070    }
1071    return node;
1072  }
1073
1074  public RegionFailedOpen getFailedOpen(final RegionInfo regionInfo) {
1075    return regionFailedOpen.get(regionInfo.getRegionName());
1076  }
1077
1078  public void removeFromFailedOpen(final RegionInfo regionInfo) {
1079    regionFailedOpen.remove(regionInfo.getRegionName());
1080  }
1081
1082  public List<RegionState> getRegionFailedOpen() {
1083    if (regionFailedOpen.isEmpty()) return Collections.emptyList();
1084
1085    ArrayList<RegionState> regions = new ArrayList<RegionState>(regionFailedOpen.size());
1086    for (RegionFailedOpen r: regionFailedOpen.values()) {
1087      regions.add(r.getRegionStateNode().toRegionState());
1088    }
1089    return regions;
1090  }
1091
1092  // ==========================================================================
1093  //  Servers
1094  // ==========================================================================
1095
1096  /**
1097   * Be judicious calling this method. Do it on server register ONLY otherwise
1098   * you could mess up online server accounting. TOOD: Review usage and convert
1099   * to {@link #getServerNode(ServerName)} where we can.
1100   */
1101  ServerStateNode getOrCreateServer(final ServerName serverName) {
1102    ServerStateNode node = serverMap.get(serverName);
1103    if (node == null) {
1104      LOG.trace("CREATING! {}", serverName, new RuntimeException("WHERE AM I?"));
1105      node = new ServerStateNode(serverName);
1106      ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node);
1107      node = oldNode != null ? oldNode : node;
1108    }
1109    return node;
1110  }
1111
1112  public void removeServer(final ServerName serverName) {
1113    serverMap.remove(serverName);
1114  }
1115
1116  public ServerStateNode getServerNode(final ServerName serverName) {
1117    return serverMap.get(serverName);
1118  }
1119
1120  public double getAverageLoad() {
1121    int numServers = 0;
1122    int totalLoad = 0;
1123    for (ServerStateNode node: serverMap.values()) {
1124      totalLoad += node.getRegionCount();
1125      numServers++;
1126    }
1127    return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers;
1128  }
1129
1130  /**
1131   * Add reference to region to serverstatenode.
1132   * DOES NOT AUTO-CREATE ServerStateNode instance.
1133   * @return Return serverstatenode or null if none.
1134   */
1135  ServerStateNode addRegionToServer(final RegionStateNode regionNode) {
1136    ServerStateNode ssn = getServerNode(regionNode.getRegionLocation());
1137    if (ssn == null) {
1138      return ssn;
1139    }
1140    ssn.addRegion(regionNode);
1141    return ssn;
1142  }
1143
1144  public boolean isReplicaAvailableForRegion(final RegionInfo info) {
1145    // if the region info itself is a replica return true.
1146    if (!RegionReplicaUtil.isDefaultReplica(info)) {
1147      return true;
1148    }
1149    // iterate the regionsMap for the given region name. If there are replicas it should
1150    // list them in order.
1151    for (RegionStateNode node : regionsMap.tailMap(info.getRegionName()).values()) {
1152      if (!node.getTable().equals(info.getTable())
1153          || !ServerRegionReplicaUtil.isReplicasForSameRegion(info, node.getRegionInfo())) {
1154        break;
1155      } else if (!RegionReplicaUtil.isDefaultReplica(node.getRegionInfo())) {
1156        // we have replicas
1157        return true;
1158      }
1159    }
1160    // we don have replicas
1161    return false;
1162  }
1163
1164  public ServerStateNode removeRegionFromServer(final ServerName serverName,
1165      final RegionStateNode regionNode) {
1166    ServerStateNode serverNode = getServerNode(serverName);
1167    if (serverNode != null) {
1168      serverNode.removeRegion(regionNode);
1169    }
1170    return serverNode;
1171  }
1172
1173  // ==========================================================================
1174  //  ToString helpers
1175  // ==========================================================================
1176  public static String regionNamesToString(final Collection<byte[]> regions) {
1177    final StringBuilder sb = new StringBuilder();
1178    final Iterator<byte[]> it = regions.iterator();
1179    sb.append("[");
1180    if (it.hasNext()) {
1181      sb.append(Bytes.toStringBinary(it.next()));
1182      while (it.hasNext()) {
1183        sb.append(", ");
1184        sb.append(Bytes.toStringBinary(it.next()));
1185      }
1186    }
1187    sb.append("]");
1188    return sb.toString();
1189  }
1190}