001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.master.assignment;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.Comparator;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Set;
032import java.util.SortedSet;
033import java.util.TreeSet;
034import java.util.concurrent.ConcurrentHashMap;
035import java.util.concurrent.ConcurrentSkipListMap;
036import java.util.concurrent.atomic.AtomicInteger;
037import java.util.function.Predicate;
038import java.util.stream.Collectors;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.HRegionLocation;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.RegionReplicaUtil;
045import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
046import org.apache.hadoop.hbase.master.RegionState;
047import org.apache.hadoop.hbase.master.RegionState.State;
048import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
051import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
052import org.apache.yetus.audience.InterfaceAudience;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
057
058/**
059 * RegionStates contains a set of Maps that describes the in-memory state of the AM, with
060 * the regions available in the system, the region in transition, the offline regions and
061 * the servers holding regions.
062 */
063@InterfaceAudience.Private
064public class RegionStates {
065  private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class);
066
067  protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
068    State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver
069                // but before complete finish of AssignProcedure. HBASE-20100.
070    State.OFFLINE, State.CLOSED,      // disable/offline
071    State.SPLITTING, State.SPLIT,     // ServerCrashProcedure
072    State.OPENING, State.FAILED_OPEN, // already in-progress (retrying)
073  };
074
075  protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
076    State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure
077    State.OPEN,                   // enabled/open
078    State.CLOSING                 // already in-progress (retrying)
079  };
080
081  private static class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
082    public AssignmentProcedureEvent(final RegionInfo regionInfo) {
083      super(regionInfo);
084    }
085  }
086
087  private static class ServerReportEvent extends ProcedureEvent<ServerName> {
088    public ServerReportEvent(final ServerName serverName) {
089      super(serverName);
090    }
091  }
092
093  /**
094   * Current Region State.
095   * In-memory only. Not persisted.
096   */
097  // Mutable/Immutable? Changes have to be synchronized or not?
098  // Data members are volatile which seems to say multi-threaded access is fine.
099  // In the below we do check and set but the check state could change before
100  // we do the set because no synchronization....which seems dodgy. Clear up
101  // understanding here... how many threads accessing? Do locks make it so one
102  // thread at a time working on a single Region's RegionStateNode? Lets presume
103  // so for now. Odd is that elsewhere in this RegionStates, we synchronize on
104  // the RegionStateNode instance. TODO.
105  public static class RegionStateNode implements Comparable<RegionStateNode> {
106    private final RegionInfo regionInfo;
107    private final ProcedureEvent<?> event;
108
109    private volatile RegionTransitionProcedure procedure = null;
110    private volatile ServerName regionLocation = null;
111    // notice that, the lastHost will only be updated when a region is successfully CLOSED through
112    // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
113    // with the data in meta.
114    private volatile ServerName lastHost = null;
115    /**
116     * A Region-in-Transition (RIT) moves through states.
117     * See {@link State} for complete list. A Region that
118     * is opened moves from OFFLINE => OPENING => OPENED.
119     */
120    private volatile State state = State.OFFLINE;
121
122    /**
123     * Updated whenever a call to {@link #setRegionLocation(ServerName)}
124     * or {@link #setState(State, State...)}.
125     */
126    private volatile long lastUpdate = 0;
127
128    private volatile long openSeqNum = HConstants.NO_SEQNUM;
129
130    public RegionStateNode(final RegionInfo regionInfo) {
131      this.regionInfo = regionInfo;
132      this.event = new AssignmentProcedureEvent(regionInfo);
133    }
134
135    /**
136     * @param update new region state this node should be assigned.
137     * @param expected current state should be in this given list of expected states
138     * @return true, if current state is in expected list; otherwise false.
139     */
140    public boolean setState(final State update, final State... expected) {
141      if (!isInState(expected)) {
142        return false;
143      }
144      this.state = update;
145      this.lastUpdate = EnvironmentEdgeManager.currentTime();
146      return true;
147    }
148
149    /**
150     * Put region into OFFLINE mode (set state and clear location).
151     * @return Last recorded server deploy
152     */
153    public ServerName offline() {
154      setState(State.OFFLINE);
155      return setRegionLocation(null);
156    }
157
158    /**
159     * Set new {@link State} but only if currently in <code>expected</code> State
160     * (if not, throw {@link UnexpectedStateException}.
161     */
162    public void transitionState(final State update, final State... expected)
163    throws UnexpectedStateException {
164      if (!setState(update, expected)) {
165        throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
166          " so could move to " + update + " but current state=" + getState());
167      }
168    }
169
170    public boolean isInState(final State... expected) {
171      if (expected != null && expected.length > 0) {
172        boolean expectedState = false;
173        for (int i = 0; i < expected.length; ++i) {
174          expectedState |= (getState() == expected[i]);
175        }
176        return expectedState;
177      }
178      return true;
179    }
180
181    public boolean isStuck() {
182      return isInState(State.FAILED_OPEN) && getProcedure() != null;
183    }
184
185    public boolean isInTransition() {
186      return getProcedure() != null;
187    }
188
189    public long getLastUpdate() {
190      return procedure != null ? procedure.getLastUpdate() : lastUpdate;
191    }
192
193    public void setLastHost(final ServerName serverName) {
194      this.lastHost = serverName;
195    }
196
197    public void setOpenSeqNum(final long seqId) {
198      this.openSeqNum = seqId;
199    }
200
201    public ServerName setRegionLocation(final ServerName serverName) {
202      ServerName lastRegionLocation = this.regionLocation;
203      if (LOG.isTraceEnabled() && serverName == null) {
204        LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
205      }
206      this.regionLocation = serverName;
207      this.lastUpdate = EnvironmentEdgeManager.currentTime();
208      return lastRegionLocation;
209    }
210
211    public boolean setProcedure(final RegionTransitionProcedure proc) {
212      if (this.procedure != null && this.procedure != proc) {
213        return false;
214      }
215      this.procedure = proc;
216      return true;
217    }
218
219    public boolean unsetProcedure(final RegionTransitionProcedure proc) {
220      if (this.procedure != null && this.procedure != proc) {
221        return false;
222      }
223      this.procedure = null;
224      return true;
225    }
226
227    public RegionTransitionProcedure getProcedure() {
228      return procedure;
229    }
230
231    public ProcedureEvent<?> getProcedureEvent() {
232      return event;
233    }
234
235    public RegionInfo getRegionInfo() {
236      return regionInfo;
237    }
238
239    public TableName getTable() {
240      return getRegionInfo().getTable();
241    }
242
243    public boolean isSystemTable() {
244      return getTable().isSystemTable();
245    }
246
247    public ServerName getLastHost() {
248      return lastHost;
249    }
250
251    public ServerName getRegionLocation() {
252      return regionLocation;
253    }
254
255    public State getState() {
256      return state;
257    }
258
259    public long getOpenSeqNum() {
260      return openSeqNum;
261    }
262
263    public int getFormatVersion() {
264      // we don't have any format for now
265      // it should probably be in regionInfo.getFormatVersion()
266      return 0;
267    }
268
269    public RegionState toRegionState() {
270      return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
271    }
272
273    @Override
274    public int compareTo(final RegionStateNode other) {
275      // NOTE: RegionInfo sort by table first, so we are relying on that.
276      // we have a TestRegionState#testOrderedByTable() that check for that.
277      return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
278    }
279
280    @Override
281    public int hashCode() {
282      return getRegionInfo().hashCode();
283    }
284
285    @Override
286    public boolean equals(final Object other) {
287      if (this == other) return true;
288      if (!(other instanceof RegionStateNode)) return false;
289      return compareTo((RegionStateNode)other) == 0;
290    }
291
292    @Override
293    public String toString() {
294      return toDescriptiveString();
295    }
296
297    public String toShortString() {
298      // rit= is the current Region-In-Transition State -- see State enum.
299      return String.format("rit=%s, location=%s", getState(), getRegionLocation());
300    }
301
302    public String toDescriptiveString() {
303      return String.format("%s, table=%s, region=%s",
304        toShortString(), getTable(), getRegionInfo().getEncodedName());
305    }
306  }
307
308  // This comparator sorts the RegionStates by time stamp then Region name.
309  // Comparing by timestamp alone can lead us to discard different RegionStates that happen
310  // to share a timestamp.
311  private static class RegionStateStampComparator implements Comparator<RegionState> {
312    @Override
313    public int compare(final RegionState l, final RegionState r) {
314      int stampCmp = Long.compare(l.getStamp(), r.getStamp());
315      return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
316    }
317  }
318
319  /**
320   * Server State.
321   */
322  public enum ServerState {
323    /**
324     * Initial state. Available.
325     */
326    ONLINE,
327
328    /**
329     * Only server which carries meta can have this state. We will split wal for meta and then
330     * assign meta first before splitting other wals.
331     */
332    SPLITTING_META,
333
334    /**
335     * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure
336     * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more
337     * details.
338     */
339    SPLITTING_META_DONE,
340
341    /**
342     * Server expired/crashed. Currently undergoing WAL splitting.
343     */
344    SPLITTING,
345
346    /**
347     * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely
348     * quit. See the comments in UnassignProcedure.remoteCallFailed for more details.
349     */
350    OFFLINE
351  }
352
353  /**
354   * State of Server; list of hosted regions, etc.
355   */
356  public static class ServerStateNode implements Comparable<ServerStateNode> {
357    private final ServerReportEvent reportEvent;
358
359    private final Set<RegionStateNode> regions;
360    private final ServerName serverName;
361
362    private volatile ServerState state = ServerState.ONLINE;
363
364    public ServerStateNode(final ServerName serverName) {
365      this.serverName = serverName;
366      this.regions = ConcurrentHashMap.newKeySet();
367      this.reportEvent = new ServerReportEvent(serverName);
368    }
369
370    public ServerName getServerName() {
371      return serverName;
372    }
373
374    public ServerState getState() {
375      return state;
376    }
377
378    public ProcedureEvent<?> getReportEvent() {
379      return reportEvent;
380    }
381
382    public boolean isInState(final ServerState... expected) {
383      boolean expectedState = false;
384      if (expected != null) {
385        for (int i = 0; i < expected.length; ++i) {
386          expectedState |= (state == expected[i]);
387        }
388      }
389      return expectedState;
390    }
391
392    private void setState(final ServerState state) {
393      this.state = state;
394    }
395
396    public Set<RegionStateNode> getRegions() {
397      return regions;
398    }
399
400    public int getRegionCount() {
401      return regions.size();
402    }
403
404    public ArrayList<RegionInfo> getRegionInfoList() {
405      ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size());
406      for (RegionStateNode region: regions) {
407        hris.add(region.getRegionInfo());
408      }
409      return hris;
410    }
411
412    public void addRegion(final RegionStateNode regionNode) {
413      this.regions.add(regionNode);
414    }
415
416    public void removeRegion(final RegionStateNode regionNode) {
417      this.regions.remove(regionNode);
418    }
419
420    @Override
421    public int compareTo(final ServerStateNode other) {
422      return getServerName().compareTo(other.getServerName());
423    }
424
425    @Override
426    public int hashCode() {
427      return getServerName().hashCode();
428    }
429
430    @Override
431    public boolean equals(final Object other) {
432      if (this == other) return true;
433      if (!(other instanceof ServerStateNode)) return false;
434      return compareTo((ServerStateNode)other) == 0;
435    }
436
437    @Override
438    public String toString() {
439      return String.format("name=%s, state=%s, regionCount=%d", getServerName(), getState(),
440          getRegionCount());
441    }
442  }
443
444  public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
445      new RegionStateStampComparator();
446
447  // TODO: Replace the ConcurrentSkipListMaps
448  /**
449   * RegionName -- i.e. RegionInfo.getRegionName() -- as bytes to {@link RegionStateNode}
450   */
451  private final ConcurrentSkipListMap<byte[], RegionStateNode> regionsMap =
452      new ConcurrentSkipListMap<byte[], RegionStateNode>(Bytes.BYTES_COMPARATOR);
453
454  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionInTransition =
455    new ConcurrentSkipListMap<RegionInfo, RegionStateNode>(RegionInfo.COMPARATOR);
456
457  /**
458   * Regions marked as offline on a read of hbase:meta. Unused or at least, once
459   * offlined, regions have no means of coming on line again. TODO.
460   */
461  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionOffline =
462    new ConcurrentSkipListMap<RegionInfo, RegionStateNode>();
463
464  private final ConcurrentSkipListMap<byte[], RegionFailedOpen> regionFailedOpen =
465    new ConcurrentSkipListMap<byte[], RegionFailedOpen>(Bytes.BYTES_COMPARATOR);
466
467  private final ConcurrentHashMap<ServerName, ServerStateNode> serverMap =
468      new ConcurrentHashMap<ServerName, ServerStateNode>();
469
470  public RegionStates() { }
471
472  public void clear() {
473    regionsMap.clear();
474    regionInTransition.clear();
475    regionOffline.clear();
476    serverMap.clear();
477  }
478
479  @VisibleForTesting
480  public boolean isRegionInRegionStates(final RegionInfo hri) {
481    return (regionsMap.containsKey(hri.getRegionName()) || regionInTransition.containsKey(hri)
482        || regionOffline.containsKey(hri));
483  }
484
485  // ==========================================================================
486  //  RegionStateNode helpers
487  // ==========================================================================
488  protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) {
489    RegionStateNode newNode = new RegionStateNode(regionInfo);
490    RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode);
491    return oldNode != null ? oldNode : newNode;
492  }
493
494  protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) {
495    RegionStateNode node = regionsMap.get(regionInfo.getRegionName());
496    return node != null ? node : createRegionStateNode(regionInfo);
497  }
498
499  RegionStateNode getRegionStateNodeFromName(final byte[] regionName) {
500    return regionsMap.get(regionName);
501  }
502
503  public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) {
504    return getRegionStateNodeFromName(regionInfo.getRegionName());
505  }
506
507  public void deleteRegion(final RegionInfo regionInfo) {
508    regionsMap.remove(regionInfo.getRegionName());
509    // See HBASE-20860
510    // After master restarts, merged regions' RIT state may not be cleaned,
511    // making sure they are cleaned here
512    if (regionInTransition.containsKey(regionInfo)) {
513      regionInTransition.remove(regionInfo);
514    }
515    // Remove from the offline regions map too if there.
516    if (this.regionOffline.containsKey(regionInfo)) {
517      if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " + regionInfo);
518      this.regionOffline.remove(regionInfo);
519    }
520  }
521
522  ArrayList<RegionStateNode> getTableRegionStateNodes(final TableName tableName) {
523    final ArrayList<RegionStateNode> regions = new ArrayList<RegionStateNode>();
524    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
525      if (!node.getTable().equals(tableName)) break;
526      regions.add(node);
527    }
528    return regions;
529  }
530
531  ArrayList<RegionState> getTableRegionStates(final TableName tableName) {
532    final ArrayList<RegionState> regions = new ArrayList<RegionState>();
533    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
534      if (!node.getTable().equals(tableName)) break;
535      regions.add(node.toRegionState());
536    }
537    return regions;
538  }
539
540  ArrayList<RegionInfo> getTableRegionsInfo(final TableName tableName) {
541    final ArrayList<RegionInfo> regions = new ArrayList<RegionInfo>();
542    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
543      if (!node.getTable().equals(tableName)) break;
544      regions.add(node.getRegionInfo());
545    }
546    return regions;
547  }
548
549  Collection<RegionStateNode> getRegionStateNodes() {
550    return regionsMap.values();
551  }
552
553  public ArrayList<RegionState> getRegionStates() {
554    final ArrayList<RegionState> regions = new ArrayList<RegionState>(regionsMap.size());
555    for (RegionStateNode node: regionsMap.values()) {
556      regions.add(node.toRegionState());
557    }
558    return regions;
559  }
560
561  // ==========================================================================
562  //  RegionState helpers
563  // ==========================================================================
564  public RegionState getRegionState(final RegionInfo regionInfo) {
565    RegionStateNode regionStateNode = getRegionStateNode(regionInfo);
566    return regionStateNode == null ? null : regionStateNode.toRegionState();
567  }
568
569  public RegionState getRegionState(final String encodedRegionName) {
570    // TODO: Need a map <encodedName, ...> but it is just dispatch merge...
571    for (RegionStateNode node: regionsMap.values()) {
572      if (node.getRegionInfo().getEncodedName().equals(encodedRegionName)) {
573        return node.toRegionState();
574      }
575    }
576    return null;
577  }
578
579  // ============================================================================================
580  //  TODO: helpers
581  // ============================================================================================
582  public boolean hasTableRegionStates(final TableName tableName) {
583    // TODO
584    return !getTableRegionStates(tableName).isEmpty();
585  }
586
587  /**
588   * @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
589   */
590  public List<RegionInfo> getRegionsOfTable(final TableName table) {
591    return getRegionsOfTable(table, false);
592  }
593
594  private HRegionLocation createRegionForReopen(RegionStateNode node) {
595    synchronized (node) {
596      if (!include(node, false)) {
597        return null;
598      }
599      if (node.isInState(State.OPEN)) {
600        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
601          node.getOpenSeqNum());
602      } else if (node.isInState(State.OPENING)) {
603        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), -1);
604      } else {
605        return null;
606      }
607    }
608  }
609
610  /**
611   * Get the regions to be reopened when modifying a table.
612   * <p/>
613   * Notice that the {@code openSeqNum} in the returned HRegionLocation is also used to indicate the
614   * state of this region, positive means the region is in {@link State#OPEN}, -1 means
615   * {@link State#OPENING}. And for regions in other states we do not need reopen them.
616   */
617  public List<HRegionLocation> getRegionsOfTableForReopen(TableName tableName) {
618    return getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen)
619      .filter(r -> r != null).collect(Collectors.toList());
620  }
621
622  /**
623   * Check whether the region has been reopened. The meaning of the {@link HRegionLocation} is the
624   * same with {@link #getRegionsOfTableForReopen(TableName)}.
625   * <p/>
626   * For a region which is in {@link State#OPEN} before, if the region state is changed or the open
627   * seq num is changed, we can confirm that it has been reopened.
628   * <p/>
629   * For a region which is in {@link State#OPENING} before, usually it will be in {@link State#OPEN}
630   * now and we will schedule a MRP to reopen it. But there are several exceptions:
631   * <ul>
632   * <li>The region is in state other than {@link State#OPEN} or {@link State#OPENING}.</li>
633   * <li>The location of the region has been changed</li>
634   * </ul>
635   * Of course the region could still be in {@link State#OPENING} state and still on the same
636   * server, then here we will still return a {@link HRegionLocation} for it, just like
637   * {@link #getRegionsOfTableForReopen(TableName)}.
638   * @param oldLoc the previous state/location of this region
639   * @return null if the region has been reopened, otherwise a new {@link HRegionLocation} which
640   *         means we still need to reopen the region.
641   * @see #getRegionsOfTableForReopen(TableName)
642   */
643  public HRegionLocation checkReopened(HRegionLocation oldLoc) {
644    RegionStateNode node = getRegionStateNode(oldLoc.getRegion());
645    // HBASE-20921
646    // if the oldLoc's state node does not exist, that means the region is
647    // merged or split, no need to check it
648    if (node == null) {
649      return null;
650    }
651    synchronized (node) {
652      if (oldLoc.getSeqNum() >= 0) {
653        // in OPEN state before
654        if (node.isInState(State.OPEN)) {
655          if (node.getOpenSeqNum() > oldLoc.getSeqNum()) {
656            // normal case, the region has been reopened
657            return null;
658          } else {
659            // the open seq num does not change, need to reopen again
660            return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
661              node.getOpenSeqNum());
662          }
663        } else {
664          // the state has been changed so we can make sure that the region has been reopened(not
665          // finished maybe, but not a problem).
666          return null;
667        }
668      } else {
669        // in OPENING state before
670        if (!node.isInState(State.OPEN, State.OPENING)) {
671          // not in OPEN or OPENING state, then we can make sure that the region has been
672          // reopened(not finished maybe, but not a problem)
673          return null;
674        } else {
675          if (!node.getRegionLocation().equals(oldLoc.getServerName())) {
676            // the region has been moved, so we can make sure that the region has been reopened.
677            return null;
678          }
679          // normal case, we are still in OPENING state, or the reopen has been opened and the state
680          // is changed to OPEN.
681          long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() : -1;
682          return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum);
683        }
684      }
685    }
686  }
687
688  /**
689   * @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
690   */
691  public List<RegionInfo> getRegionsOfTable(TableName table, boolean offline) {
692    return getRegionsOfTable(table, state -> include(state, offline));
693  }
694
695  /**
696   * @return Return the regions of the table; does not include OFFLINE unless you set
697   *         <code>offline</code> to true. Does not include regions that are in the
698   *         {@link State#SPLIT} state.
699   */
700  private List<RegionInfo> getRegionsOfTable(TableName table, Predicate<RegionStateNode> filter) {
701    return getTableRegionStateNodes(table).stream().filter(filter).map(n -> n.getRegionInfo())
702      .collect(Collectors.toList());
703  }
704
705  /**
706   * Utility. Whether to include region in list of regions. Default is to
707   * weed out split and offline regions.
708   * @return True if we should include the <code>node</code> (do not include
709   * if split or offline unless <code>offline</code> is set to true.
710   */
711  boolean include(final RegionStateNode node, final boolean offline) {
712    if (LOG.isTraceEnabled()) {
713      LOG.trace("WORKING ON " + node + " " + node.getRegionInfo());
714    }
715    if (node.isInState(State.SPLIT)) return false;
716    if (node.isInState(State.OFFLINE) && !offline) return false;
717    final RegionInfo hri = node.getRegionInfo();
718    return (!hri.isOffline() && !hri.isSplit()) ||
719        ((hri.isOffline() || hri.isSplit()) && offline);
720  }
721
722  /**
723   * Returns the set of regions hosted by the specified server
724   * @param serverName the server we are interested in
725   * @return set of RegionInfo hosted by the specified server
726   */
727  public List<RegionInfo> getServerRegionInfoSet(final ServerName serverName) {
728    final ServerStateNode serverInfo = getServerNode(serverName);
729    if (serverInfo == null) return Collections.emptyList();
730
731    synchronized (serverInfo) {
732      return serverInfo.getRegionInfoList();
733    }
734  }
735
736  // ============================================================================================
737  // Split helpers
738  // These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove
739  // the ServerStateNode by calling removeServer.
740  // ============================================================================================
741
742  private void setServerState(ServerName serverName, ServerState state) {
743    ServerStateNode serverNode = getOrCreateServer(serverName);
744    synchronized (serverNode) {
745      serverNode.setState(state);
746    }
747  }
748
749  /**
750   * Call this when we start meta log splitting a crashed Server.
751   * @see #metaLogSplit(ServerName)
752   */
753  public void metaLogSplitting(ServerName serverName) {
754    setServerState(serverName, ServerState.SPLITTING_META);
755  }
756
757  /**
758   * Called after we've split the meta logs on a crashed Server.
759   * @see #metaLogSplitting(ServerName)
760   */
761  public void metaLogSplit(ServerName serverName) {
762    setServerState(serverName, ServerState.SPLITTING_META_DONE);
763  }
764
765  /**
766   * Call this when we start log splitting for a crashed Server.
767   * @see #logSplit(ServerName)
768   */
769  public void logSplitting(final ServerName serverName) {
770    setServerState(serverName, ServerState.SPLITTING);
771  }
772
773  /**
774   * Called after we've split all logs on a crashed Server.
775   * @see #logSplitting(ServerName)
776   */
777  public void logSplit(final ServerName serverName) {
778    setServerState(serverName, ServerState.OFFLINE);
779  }
780
781  @VisibleForTesting
782  public void updateRegionState(final RegionInfo regionInfo, final State state) {
783    final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo);
784    synchronized (regionNode) {
785      regionNode.setState(state);
786    }
787  }
788
789  // ============================================================================================
790  //  TODO:
791  // ============================================================================================
792  public List<RegionInfo> getAssignedRegions() {
793    final List<RegionInfo> result = new ArrayList<RegionInfo>();
794    for (RegionStateNode node: regionsMap.values()) {
795      if (!node.isInTransition()) {
796        result.add(node.getRegionInfo());
797      }
798    }
799    return result;
800  }
801
802  public boolean isRegionInState(final RegionInfo regionInfo, final State... state) {
803    final RegionStateNode region = getRegionStateNode(regionInfo);
804    if (region != null) {
805      synchronized (region) {
806        return region.isInState(state);
807      }
808    }
809    return false;
810  }
811
812  public boolean isRegionOnline(final RegionInfo regionInfo) {
813    return isRegionInState(regionInfo, State.OPEN);
814  }
815
816  /**
817   * @return True if region is offline (In OFFLINE or CLOSED state).
818   */
819  public boolean isRegionOffline(final RegionInfo regionInfo) {
820    return isRegionInState(regionInfo, State.OFFLINE, State.CLOSED);
821  }
822
823  public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment(
824      final Collection<RegionInfo> regions) {
825    final Map<ServerName, List<RegionInfo>> result = new HashMap<ServerName, List<RegionInfo>>();
826    if (regions != null) {
827      for (RegionInfo hri : regions) {
828        final RegionStateNode node = getRegionStateNode(hri);
829        if (node == null) {
830          continue;
831        }
832        createSnapshot(node, result);
833      }
834    } else {
835      for (RegionStateNode node : regionsMap.values()) {
836        if (node == null) {
837          continue;
838        }
839        createSnapshot(node, result);
840      }
841    }
842    return result;
843  }
844
845  private void createSnapshot(RegionStateNode node, Map<ServerName, List<RegionInfo>> result) {
846    final ServerName serverName = node.getRegionLocation();
847    if (serverName == null) {
848      return;
849    }
850
851    List<RegionInfo> serverRegions = result.get(serverName);
852    if (serverRegions == null) {
853      serverRegions = new ArrayList<RegionInfo>();
854      result.put(serverName, serverRegions);
855    }
856    serverRegions.add(node.getRegionInfo());
857  }
858
859  public Map<RegionInfo, ServerName> getRegionAssignments() {
860    final HashMap<RegionInfo, ServerName> assignments = new HashMap<RegionInfo, ServerName>();
861    for (RegionStateNode node: regionsMap.values()) {
862      assignments.put(node.getRegionInfo(), node.getRegionLocation());
863    }
864    return assignments;
865  }
866
867  public Map<RegionState.State, List<RegionInfo>> getRegionByStateOfTable(TableName tableName) {
868    final State[] states = State.values();
869    final Map<RegionState.State, List<RegionInfo>> tableRegions =
870        new HashMap<State, List<RegionInfo>>(states.length);
871    for (int i = 0; i < states.length; ++i) {
872      tableRegions.put(states[i], new ArrayList<RegionInfo>());
873    }
874
875    for (RegionStateNode node: regionsMap.values()) {
876      if (node.getTable().equals(tableName)) {
877        tableRegions.get(node.getState()).add(node.getRegionInfo());
878      }
879    }
880    return tableRegions;
881  }
882
883  public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) {
884    final RegionStateNode region = getRegionStateNode(regionInfo);
885    if (region != null) {
886      synchronized (region) {
887        ServerName server = region.getRegionLocation();
888        return server != null ? server : region.getLastHost();
889      }
890    }
891    return null;
892  }
893
894  /**
895   * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
896   * Can't let out original since it can change and at least the load balancer
897   * wants to iterate this exported list.  We need to synchronize on regions
898   * since all access to this.servers is under a lock on this.regions.
899   * @param forceByCluster a flag to force to aggregate the server-load to the cluster level
900   * @return A clone of current assignments by table.
901   */
902  public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsByTable(
903      final boolean forceByCluster) {
904    if (!forceByCluster) return getAssignmentsByTable();
905
906    final HashMap<ServerName, List<RegionInfo>> ensemble =
907      new HashMap<ServerName, List<RegionInfo>>(serverMap.size());
908    for (ServerStateNode serverNode: serverMap.values()) {
909      ensemble.put(serverNode.getServerName(), serverNode.getRegionInfoList());
910    }
911
912    // TODO: can we use Collections.singletonMap(HConstants.ENSEMBLE_TABLE_NAME, ensemble)?
913    final Map<TableName, Map<ServerName, List<RegionInfo>>> result =
914      new HashMap<TableName, Map<ServerName, List<RegionInfo>>>(1);
915    result.put(HConstants.ENSEMBLE_TABLE_NAME, ensemble);
916    return result;
917  }
918
919  public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsByTable() {
920    final Map<TableName, Map<ServerName, List<RegionInfo>>> result = new HashMap<>();
921    for (RegionStateNode node: regionsMap.values()) {
922      Map<ServerName, List<RegionInfo>> tableResult = result.get(node.getTable());
923      if (tableResult == null) {
924        tableResult = new HashMap<ServerName, List<RegionInfo>>();
925        result.put(node.getTable(), tableResult);
926      }
927
928      final ServerName serverName = node.getRegionLocation();
929      if (serverName == null) {
930        LOG.info("Skipping, no server for " + node);
931        continue;
932      }
933      List<RegionInfo> serverResult = tableResult.get(serverName);
934      if (serverResult == null) {
935        serverResult = new ArrayList<RegionInfo>();
936        tableResult.put(serverName, serverResult);
937      }
938
939      serverResult.add(node.getRegionInfo());
940    }
941    // Add online servers with no assignment for the table.
942    for (Map<ServerName, List<RegionInfo>> table: result.values()) {
943        for (ServerName svr : serverMap.keySet()) {
944          if (!table.containsKey(svr)) {
945            table.put(svr, new ArrayList<RegionInfo>());
946          }
947        }
948    }
949    return result;
950  }
951
952  // ==========================================================================
953  //  Region in transition helpers
954  // ==========================================================================
955  protected boolean addRegionInTransition(final RegionStateNode regionNode,
956      final RegionTransitionProcedure procedure) {
957    if (procedure != null && !regionNode.setProcedure(procedure)) return false;
958
959    regionInTransition.put(regionNode.getRegionInfo(), regionNode);
960    return true;
961  }
962
963  protected void removeRegionInTransition(final RegionStateNode regionNode,
964      final RegionTransitionProcedure procedure) {
965    regionInTransition.remove(regionNode.getRegionInfo());
966    regionNode.unsetProcedure(procedure);
967  }
968
969  public boolean hasRegionsInTransition() {
970    return !regionInTransition.isEmpty();
971  }
972
973  public boolean isRegionInTransition(final RegionInfo regionInfo) {
974    final RegionStateNode node = regionInTransition.get(regionInfo);
975    return node != null ? node.isInTransition() : false;
976  }
977
978  /**
979   * @return If a procedure-in-transition for <code>hri</code>, return it else null.
980   */
981  public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) {
982    RegionStateNode node = regionInTransition.get(hri);
983    if (node == null) return null;
984    return node.getProcedure();
985  }
986
987  public RegionState getRegionTransitionState(final RegionInfo hri) {
988    RegionStateNode node = regionInTransition.get(hri);
989    if (node == null) return null;
990
991    synchronized (node) {
992      return node.isInTransition() ? node.toRegionState() : null;
993    }
994  }
995
996  public List<RegionStateNode> getRegionsInTransition() {
997    return new ArrayList<RegionStateNode>(regionInTransition.values());
998  }
999
1000  /**
1001   * Get the number of regions in transition.
1002   */
1003  public int getRegionsInTransitionCount() {
1004    return regionInTransition.size();
1005  }
1006
1007  public List<RegionState> getRegionsStateInTransition() {
1008    final List<RegionState> rit = new ArrayList<RegionState>(regionInTransition.size());
1009    for (RegionStateNode node: regionInTransition.values()) {
1010      rit.add(node.toRegionState());
1011    }
1012    return rit;
1013  }
1014
1015  public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() {
1016    final SortedSet<RegionState> rit = new TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
1017    for (RegionStateNode node: regionInTransition.values()) {
1018      rit.add(node.toRegionState());
1019    }
1020    return rit;
1021  }
1022
1023  // ==========================================================================
1024  //  Region offline helpers
1025  // ==========================================================================
1026  // TODO: Populated when we read meta but regions never make it out of here.
1027  public void addToOfflineRegions(final RegionStateNode regionNode) {
1028    LOG.info("Added to offline, CURRENTLY NEVER CLEARED!!! " + regionNode);
1029    regionOffline.put(regionNode.getRegionInfo(), regionNode);
1030  }
1031
1032  // TODO: Unused.
1033  public void removeFromOfflineRegions(final RegionInfo regionInfo) {
1034    regionOffline.remove(regionInfo);
1035  }
1036
1037  // ==========================================================================
1038  //  Region FAIL_OPEN helpers
1039  // ==========================================================================
1040  public static final class RegionFailedOpen {
1041    private final RegionStateNode regionNode;
1042
1043    private volatile Exception exception = null;
1044    private AtomicInteger retries = new AtomicInteger();
1045
1046    public RegionFailedOpen(final RegionStateNode regionNode) {
1047      this.regionNode = regionNode;
1048    }
1049
1050    public RegionStateNode getRegionStateNode() {
1051      return regionNode;
1052    }
1053
1054    public RegionInfo getRegionInfo() {
1055      return regionNode.getRegionInfo();
1056    }
1057
1058    public int incrementAndGetRetries() {
1059      return this.retries.incrementAndGet();
1060    }
1061
1062    public int getRetries() {
1063      return retries.get();
1064    }
1065
1066    public void setException(final Exception exception) {
1067      this.exception = exception;
1068    }
1069
1070    public Exception getException() {
1071      return this.exception;
1072    }
1073  }
1074
1075  public RegionFailedOpen addToFailedOpen(final RegionStateNode regionNode) {
1076    final byte[] key = regionNode.getRegionInfo().getRegionName();
1077    RegionFailedOpen node = regionFailedOpen.get(key);
1078    if (node == null) {
1079      RegionFailedOpen newNode = new RegionFailedOpen(regionNode);
1080      RegionFailedOpen oldNode = regionFailedOpen.putIfAbsent(key, newNode);
1081      node = oldNode != null ? oldNode : newNode;
1082    }
1083    return node;
1084  }
1085
1086  public RegionFailedOpen getFailedOpen(final RegionInfo regionInfo) {
1087    return regionFailedOpen.get(regionInfo.getRegionName());
1088  }
1089
1090  public void removeFromFailedOpen(final RegionInfo regionInfo) {
1091    regionFailedOpen.remove(regionInfo.getRegionName());
1092  }
1093
1094  public List<RegionState> getRegionFailedOpen() {
1095    if (regionFailedOpen.isEmpty()) return Collections.emptyList();
1096
1097    ArrayList<RegionState> regions = new ArrayList<RegionState>(regionFailedOpen.size());
1098    for (RegionFailedOpen r: regionFailedOpen.values()) {
1099      regions.add(r.getRegionStateNode().toRegionState());
1100    }
1101    return regions;
1102  }
1103
1104  // ==========================================================================
1105  //  Servers
1106  // ==========================================================================
1107
1108  /**
1109   * Be judicious calling this method. Do it on server register ONLY otherwise
1110   * you could mess up online server accounting. TOOD: Review usage and convert
1111   * to {@link #getServerNode(ServerName)} where we can.
1112   */
1113  ServerStateNode getOrCreateServer(final ServerName serverName) {
1114    ServerStateNode node = serverMap.get(serverName);
1115    if (node == null) {
1116      LOG.trace("CREATING! {}", serverName, new RuntimeException("WHERE AM I?"));
1117      node = new ServerStateNode(serverName);
1118      ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node);
1119      node = oldNode != null ? oldNode : node;
1120    }
1121    return node;
1122  }
1123
1124  public void removeServer(final ServerName serverName) {
1125    serverMap.remove(serverName);
1126  }
1127
1128  public ServerStateNode getServerNode(final ServerName serverName) {
1129    return serverMap.get(serverName);
1130  }
1131
1132  public double getAverageLoad() {
1133    int numServers = 0;
1134    int totalLoad = 0;
1135    for (ServerStateNode node: serverMap.values()) {
1136      totalLoad += node.getRegionCount();
1137      numServers++;
1138    }
1139    return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers;
1140  }
1141
1142  /**
1143   * Add reference to region to serverstatenode.
1144   * DOES NOT AUTO-CREATE ServerStateNode instance.
1145   * @return Return serverstatenode or null if none.
1146   */
1147  ServerStateNode addRegionToServer(final RegionStateNode regionNode) {
1148    ServerStateNode ssn = getServerNode(regionNode.getRegionLocation());
1149    if (ssn == null) {
1150      return ssn;
1151    }
1152    ssn.addRegion(regionNode);
1153    return ssn;
1154  }
1155
1156  public boolean isReplicaAvailableForRegion(final RegionInfo info) {
1157    // if the region info itself is a replica return true.
1158    if (!RegionReplicaUtil.isDefaultReplica(info)) {
1159      return true;
1160    }
1161    // iterate the regionsMap for the given region name. If there are replicas it should
1162    // list them in order.
1163    for (RegionStateNode node : regionsMap.tailMap(info.getRegionName()).values()) {
1164      if (!node.getTable().equals(info.getTable())
1165          || !ServerRegionReplicaUtil.isReplicasForSameRegion(info, node.getRegionInfo())) {
1166        break;
1167      } else if (!RegionReplicaUtil.isDefaultReplica(node.getRegionInfo())) {
1168        // we have replicas
1169        return true;
1170      }
1171    }
1172    // we don have replicas
1173    return false;
1174  }
1175
1176  public ServerStateNode removeRegionFromServer(final ServerName serverName,
1177      final RegionStateNode regionNode) {
1178    ServerStateNode serverNode = getServerNode(serverName);
1179    if (serverNode != null) {
1180      serverNode.removeRegion(regionNode);
1181    }
1182    return serverNode;
1183  }
1184
1185  // ==========================================================================
1186  //  ToString helpers
1187  // ==========================================================================
1188  public static String regionNamesToString(final Collection<byte[]> regions) {
1189    final StringBuilder sb = new StringBuilder();
1190    final Iterator<byte[]> it = regions.iterator();
1191    sb.append("[");
1192    if (it.hasNext()) {
1193      sb.append(Bytes.toStringBinary(it.next()));
1194      while (it.hasNext()) {
1195        sb.append(", ");
1196        sb.append(Bytes.toStringBinary(it.next()));
1197      }
1198    }
1199    sb.append("]");
1200    return sb.toString();
1201  }
1202}