001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import com.google.errorprone.annotations.RestrictedApi; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Objects; 024import java.util.concurrent.ConcurrentHashMap; 025import java.util.function.LongConsumer; 026import org.apache.hadoop.hbase.TableName; 027import org.apache.hadoop.hbase.client.RegionInfo; 028import org.apache.hadoop.hbase.client.TableState; 029import org.apache.hadoop.hbase.master.RegionState; 030import org.apache.hadoop.hbase.master.TableStateManager; 031import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 032import org.apache.hadoop.hbase.util.Pair; 033import org.apache.yetus.audience.InterfaceAudience; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037/** 038 * Tracks regions that are currently in transition (RIT) - those not yet in their terminal state. 039 */ 040@InterfaceAudience.Private 041public class RegionInTransitionTracker { 042 private static final Logger LOG = LoggerFactory.getLogger(RegionInTransitionTracker.class); 043 044 private static final List<RegionState.State> DISABLE_TABLE_REGION_STATE = 045 List.of(RegionState.State.OFFLINE, RegionState.State.CLOSED); 046 047 private static final List<RegionState.State> ENABLE_TABLE_REGION_STATE = 048 List.of(RegionState.State.OPEN); 049 050 // DO NOT USE containsKey()/remove() on regionInTransition with a RegionInfo instance whose 051 // offline flag differs from the one stored as the key: RegionInfo#equals and #hashCode both 052 // include the offline flag, so such a lookup misses even when it refers to the same region. 053 // Offline value changes with splitting. 054 private final ConcurrentHashMap<RegionInfo, Pair<RegionStateNode, Long>> regionInTransition = 055 new ConcurrentHashMap<>(); 056 057 private final LongConsumer ritDurationConsumer; 058 private TableStateManager tableStateManager; 059 060 public RegionInTransitionTracker(LongConsumer ritDurationConsumer) { 061 this.ritDurationConsumer = Objects.requireNonNull(ritDurationConsumer); 062 } 063 064 @RestrictedApi(explanation = "Should only be called in tests", link = "", 065 allowedOnPath = ".*/src/test/.*") 066 boolean isRegionInTransition(final RegionInfo regionInfo) { 067 return regionInTransition.containsKey(regionInfo); 068 } 069 070 /** 071 * Handles a region whose hosting RegionServer has crashed. When a RegionServer fails, all regions 072 * it was hosting are automatically added to the RIT list since they need to be reassigned to 073 * other servers. 074 * @param regionStateNode the region whose hosting server crashed 075 * @param crashTime the RIT start time to use when the region is not already in transition 076 * (an existing entry keeps its earlier start). Passed explicitly rather 077 * than read from {@link RegionStateNode#getLastUpdate()}, which a stale 078 * procedure can mask. A non-positive value falls back to the node's last 079 * update. 080 */ 081 public void regionCrashed(RegionStateNode regionStateNode, long crashTime) { 082 if (isReplica(regionStateNode)) { 083 return; 084 } 085 086 long startTime = crashTime > 0 ? crashTime : regionStateNode.getLastUpdate(); 087 if (addRegionInTransition(regionStateNode, startTime)) { 088 LOG.debug("{} added to RIT list because hosting region server is crashed ", 089 regionStateNode.getRegionInfo().getEncodedName()); 090 } 091 } 092 093 /** 094 * Processes a region state change and updates the RIT tracking accordingly. This is the core 095 * method that determines whether a region should be added to or removed from the RIT list based 096 * on its current state and the table's enabled/disabled status. This method should be called 097 * whenever a region state changes get stored to hbase:meta Note: Only default replicas (replica 098 * ID 0) are tracked. Read replicas are ignored. 099 * @param regionStateNode the region state node with the current state information 100 */ 101 public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) { 102 // only consider default replica for availability 103 if (isReplica(regionStateNode)) { 104 return; 105 } 106 107 RegionState.State currentState = regionStateNode.getState(); 108 boolean tableEnabled = isTableEnabled(regionStateNode.getTable()); 109 List<RegionState.State> terminalStates = 110 tableEnabled ? ENABLE_TABLE_REGION_STATE : DISABLE_TABLE_REGION_STATE; 111 112 // if region is merged or split it should not be in RIT list 113 if (AssignmentManagerUtil.isSplitOrMerged(regionStateNode)) { 114 if (removeRegionInTransition(regionStateNode.getRegionInfo())) { 115 LOG.debug("Removed {} from RIT list as it is split or merged", 116 regionStateNode.getRegionInfo().getEncodedName()); 117 } 118 } else if (!terminalStates.contains(currentState)) { 119 if (addRegionInTransition(regionStateNode)) { 120 LOG.debug("{} added to RIT list because it is in-between state, region state : {} ", 121 regionStateNode.getRegionInfo().getEncodedName(), currentState); 122 } 123 } else { 124 if (removeRegionInTransition(regionStateNode.getRegionInfo())) { 125 LOG.debug("Removed {} from RIT list as reached to terminal state {}", 126 regionStateNode.getRegionInfo().getEncodedName(), currentState); 127 } 128 } 129 } 130 131 private boolean isTableEnabled(TableName tableName) { 132 if (tableStateManager != null) { 133 return tableStateManager.isTableState(tableName, TableState.State.ENABLED, 134 TableState.State.ENABLING); 135 } 136 // AssignmentManager calls setTableStateManager once hbase:meta is confirmed online, if it is 137 // still null it means confirmation is still pending. One should not access TableStateManger 138 // till the time. 139 assert TableName.isMetaTableName(tableName); 140 return true; 141 } 142 143 /** 144 * Handles the deletion of a region by removing it from RIT tracking. This is called when a region 145 * is permanently removed from the cluster, typically after a successful merge operation where the 146 * parent regions are cleaned up. During table deletion, table should be already disabled and all 147 * the region are already OFFLINE 148 * @param regionInfo the region being deleted 149 */ 150 public void handleRegionDelete(RegionInfo regionInfo) { 151 removeRegionInTransition(regionInfo); 152 } 153 154 private boolean addRegionInTransition(final RegionStateNode regionStateNode) { 155 return addRegionInTransition(regionStateNode, regionStateNode.getLastUpdate()); 156 } 157 158 private boolean addRegionInTransition(final RegionStateNode regionStateNode, long startTime) { 159 if (startTime <= 0) { 160 startTime = EnvironmentEdgeManager.currentTime(); 161 } 162 return regionInTransition.putIfAbsent(regionStateNode.getRegionInfo(), 163 Pair.newPair(regionStateNode, startTime)) == null; 164 } 165 166 private boolean removeRegionInTransition(final RegionInfo regionInfo) { 167 Pair<RegionStateNode, Long> removed = regionInTransition.remove(regionInfo); 168 if (removed != null) { 169 long duration = EnvironmentEdgeManager.currentTime() - removed.getSecond(); 170 if (duration >= 0) { 171 ritDurationConsumer.accept(duration); 172 } 173 } 174 return removed != null; 175 } 176 177 public void stop() { 178 regionInTransition.clear(); 179 } 180 181 public boolean hasRegionsInTransition() { 182 return !regionInTransition.isEmpty(); 183 } 184 185 public int getRegionsInTransitionCount() { 186 return regionInTransition.size(); 187 } 188 189 public List<RegionStateNode> getRegionsInTransition() { 190 List<RegionStateNode> regions = new ArrayList<>(regionInTransition.size()); 191 for (Pair<RegionStateNode, Long> entry : regionInTransition.values()) { 192 regions.add(entry.getFirst()); 193 } 194 return regions; 195 } 196 197 public void setTableStateManager(TableStateManager tableStateManager) { 198 this.tableStateManager = tableStateManager; 199 } 200 201 private static boolean isReplica(RegionStateNode regionStateNode) { 202 return regionStateNode.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID; 203 } 204}