001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import com.google.errorprone.annotations.RestrictedApi;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Objects;
024import java.util.concurrent.ConcurrentHashMap;
025import java.util.function.LongConsumer;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.RegionInfo;
028import org.apache.hadoop.hbase.client.TableState;
029import org.apache.hadoop.hbase.master.RegionState;
030import org.apache.hadoop.hbase.master.TableStateManager;
031import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
032import org.apache.hadoop.hbase.util.Pair;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037/**
038 * Tracks regions that are currently in transition (RIT) - those not yet in their terminal state.
039 */
040@InterfaceAudience.Private
041public class RegionInTransitionTracker {
042  private static final Logger LOG = LoggerFactory.getLogger(RegionInTransitionTracker.class);
043
044  private static final List<RegionState.State> DISABLE_TABLE_REGION_STATE =
045    List.of(RegionState.State.OFFLINE, RegionState.State.CLOSED);
046
047  private static final List<RegionState.State> ENABLE_TABLE_REGION_STATE =
048    List.of(RegionState.State.OPEN);
049
050  // DO NOT USE containsKey()/remove() on regionInTransition with a RegionInfo instance whose
051  // offline flag differs from the one stored as the key: RegionInfo#equals and #hashCode both
052  // include the offline flag, so such a lookup misses even when it refers to the same region.
053  // Offline value changes with splitting.
054  private final ConcurrentHashMap<RegionInfo, Pair<RegionStateNode, Long>> regionInTransition =
055    new ConcurrentHashMap<>();
056
057  private final LongConsumer ritDurationConsumer;
058  private TableStateManager tableStateManager;
059
060  public RegionInTransitionTracker(LongConsumer ritDurationConsumer) {
061    this.ritDurationConsumer = Objects.requireNonNull(ritDurationConsumer);
062  }
063
064  @RestrictedApi(explanation = "Should only be called in tests", link = "",
065      allowedOnPath = ".*/src/test/.*")
066  boolean isRegionInTransition(final RegionInfo regionInfo) {
067    return regionInTransition.containsKey(regionInfo);
068  }
069
070  /**
071   * Handles a region whose hosting RegionServer has crashed. When a RegionServer fails, all regions
072   * it was hosting are automatically added to the RIT list since they need to be reassigned to
073   * other servers.
074   * @param regionStateNode the region whose hosting server crashed
075   * @param crashTime       the RIT start time to use when the region is not already in transition
076   *                        (an existing entry keeps its earlier start). Passed explicitly rather
077   *                        than read from {@link RegionStateNode#getLastUpdate()}, which a stale
078   *                        procedure can mask. A non-positive value falls back to the node's last
079   *                        update.
080   */
081  public void regionCrashed(RegionStateNode regionStateNode, long crashTime) {
082    if (isReplica(regionStateNode)) {
083      return;
084    }
085
086    long startTime = crashTime > 0 ? crashTime : regionStateNode.getLastUpdate();
087    if (addRegionInTransition(regionStateNode, startTime)) {
088      LOG.debug("{} added to RIT list because hosting region server is crashed ",
089        regionStateNode.getRegionInfo().getEncodedName());
090    }
091  }
092
093  /**
094   * Processes a region state change and updates the RIT tracking accordingly. This is the core
095   * method that determines whether a region should be added to or removed from the RIT list based
096   * on its current state and the table's enabled/disabled status. This method should be called
097   * whenever a region state changes get stored to hbase:meta Note: Only default replicas (replica
098   * ID 0) are tracked. Read replicas are ignored.
099   * @param regionStateNode the region state node with the current state information
100   */
101  public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
102    // only consider default replica for availability
103    if (isReplica(regionStateNode)) {
104      return;
105    }
106
107    RegionState.State currentState = regionStateNode.getState();
108    boolean tableEnabled = isTableEnabled(regionStateNode.getTable());
109    List<RegionState.State> terminalStates =
110      tableEnabled ? ENABLE_TABLE_REGION_STATE : DISABLE_TABLE_REGION_STATE;
111
112    // if region is merged or split it should not be in RIT list
113    if (AssignmentManagerUtil.isSplitOrMerged(regionStateNode)) {
114      if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
115        LOG.debug("Removed {} from RIT list as it is split or merged",
116          regionStateNode.getRegionInfo().getEncodedName());
117      }
118    } else if (!terminalStates.contains(currentState)) {
119      if (addRegionInTransition(regionStateNode)) {
120        LOG.debug("{} added to RIT list because it is in-between state, region state : {} ",
121          regionStateNode.getRegionInfo().getEncodedName(), currentState);
122      }
123    } else {
124      if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
125        LOG.debug("Removed {} from RIT list as reached to terminal state {}",
126          regionStateNode.getRegionInfo().getEncodedName(), currentState);
127      }
128    }
129  }
130
131  private boolean isTableEnabled(TableName tableName) {
132    if (tableStateManager != null) {
133      return tableStateManager.isTableState(tableName, TableState.State.ENABLED,
134        TableState.State.ENABLING);
135    }
136    // AssignmentManager calls setTableStateManager once hbase:meta is confirmed online, if it is
137    // still null it means confirmation is still pending. One should not access TableStateManger
138    // till the time.
139    assert TableName.isMetaTableName(tableName);
140    return true;
141  }
142
143  /**
144   * Handles the deletion of a region by removing it from RIT tracking. This is called when a region
145   * is permanently removed from the cluster, typically after a successful merge operation where the
146   * parent regions are cleaned up. During table deletion, table should be already disabled and all
147   * the region are already OFFLINE
148   * @param regionInfo the region being deleted
149   */
150  public void handleRegionDelete(RegionInfo regionInfo) {
151    removeRegionInTransition(regionInfo);
152  }
153
154  private boolean addRegionInTransition(final RegionStateNode regionStateNode) {
155    return addRegionInTransition(regionStateNode, regionStateNode.getLastUpdate());
156  }
157
158  private boolean addRegionInTransition(final RegionStateNode regionStateNode, long startTime) {
159    if (startTime <= 0) {
160      startTime = EnvironmentEdgeManager.currentTime();
161    }
162    return regionInTransition.putIfAbsent(regionStateNode.getRegionInfo(),
163      Pair.newPair(regionStateNode, startTime)) == null;
164  }
165
166  private boolean removeRegionInTransition(final RegionInfo regionInfo) {
167    Pair<RegionStateNode, Long> removed = regionInTransition.remove(regionInfo);
168    if (removed != null) {
169      long duration = EnvironmentEdgeManager.currentTime() - removed.getSecond();
170      if (duration >= 0) {
171        ritDurationConsumer.accept(duration);
172      }
173    }
174    return removed != null;
175  }
176
177  public void stop() {
178    regionInTransition.clear();
179  }
180
181  public boolean hasRegionsInTransition() {
182    return !regionInTransition.isEmpty();
183  }
184
185  public int getRegionsInTransitionCount() {
186    return regionInTransition.size();
187  }
188
189  public List<RegionStateNode> getRegionsInTransition() {
190    List<RegionStateNode> regions = new ArrayList<>(regionInTransition.size());
191    for (Pair<RegionStateNode, Long> entry : regionInTransition.values()) {
192      regions.add(entry.getFirst());
193    }
194    return regions;
195  }
196
197  public void setTableStateManager(TableStateManager tableStateManager) {
198    this.tableStateManager = tableStateManager;
199  }
200
201  private static boolean isReplica(RegionStateNode regionStateNode) {
202    return regionStateNode.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID;
203  }
204}