001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.util.Arrays; 021import java.util.concurrent.ConcurrentMap; 022import java.util.concurrent.locks.Lock; 023import java.util.concurrent.locks.ReentrantLock; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.ServerName; 026import org.apache.hadoop.hbase.TableName; 027import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 028import org.apache.hadoop.hbase.client.RegionInfo; 029import org.apache.hadoop.hbase.client.RegionOfflineException; 030import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; 031import org.apache.hadoop.hbase.master.RegionState; 032import org.apache.hadoop.hbase.master.RegionState.State; 033import org.apache.hadoop.hbase.procedure2.ProcedureEvent; 034import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 040 041/** 042 * Current Region State. Most fields are synchronized with meta region, i.e, we will update meta 043 * immediately after we modify this RegionStateNode, and usually under the lock. The only exception 044 * is {@link #lastHost}, which should not be used for critical condition. 045 * <p/> 046 * Typically, the only way to modify this class is through {@link TransitRegionStateProcedure}, and 047 * we will record the TRSP along with this RegionStateNode to make sure that there could at most one 048 * TRSP. For other operations, such as SCP, we will first get the lock, and then try to schedule a 049 * TRSP. If there is already one, then the solution will be different: 050 * <ul> 051 * <li>For SCP, we will update the region state in meta to tell the TRSP to retry.</li> 052 * <li>For DisableTableProcedure, as we have the xlock, we can make sure that the TRSP has not been 053 * executed yet, so just unset it and attach a new one. The original one will quit immediately when 054 * executing.</li> 055 * <li>For split/merge, we will fail immediately as there is no actual operations yet so no 056 * harm.</li> 057 * <li>For EnableTableProcedure/TruncateTableProcedure, we can make sure that there will be no TRSP 058 * attached with the RSNs.</li> 059 * <li>For other procedures, you'd better use ReopenTableRegionsProcedure. The RTRP will take care 060 * of lots of corner cases when reopening regions.</li> 061 * </ul> 062 * <p/> 063 * Several fields are declared with {@code volatile}, which means you are free to get it without 064 * lock, but usually you should not use these fields without locking for critical condition, as it 065 * will be easily to introduce inconsistency. For example, you are free to dump the status and show 066 * it on web without locking, but if you want to change the state of the RegionStateNode by checking 067 * the current state, you'd better have the lock... 068 */ 069@InterfaceAudience.Private 070public class RegionStateNode implements Comparable<RegionStateNode> { 071 072 private static final Logger LOG = LoggerFactory.getLogger(RegionStateNode.class); 073 074 private static final class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> { 075 public AssignmentProcedureEvent(final RegionInfo regionInfo) { 076 super(regionInfo); 077 } 078 } 079 080 @VisibleForTesting 081 final Lock lock = new ReentrantLock(); 082 private final RegionInfo regionInfo; 083 private final ProcedureEvent<?> event; 084 private final ConcurrentMap<RegionInfo, RegionStateNode> ritMap; 085 086 // volatile only for getLastUpdate and test usage, the upper layer should sync on the 087 // RegionStateNode before accessing usually. 088 private volatile TransitRegionStateProcedure procedure = null; 089 private volatile ServerName regionLocation = null; 090 // notice that, the lastHost will only be updated when a region is successfully CLOSED through 091 // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync 092 // with the data in meta. 093 private volatile ServerName lastHost = null; 094 /** 095 * A Region-in-Transition (RIT) moves through states. See {@link State} for complete list. A 096 * Region that is opened moves from OFFLINE => OPENING => OPENED. 097 */ 098 private volatile State state = State.OFFLINE; 099 100 /** 101 * Updated whenever a call to {@link #setRegionLocation(ServerName)} or 102 * {@link #setState(RegionState.State, RegionState.State...)}. 103 */ 104 private volatile long lastUpdate = 0; 105 106 private volatile long openSeqNum = HConstants.NO_SEQNUM; 107 108 RegionStateNode(RegionInfo regionInfo, ConcurrentMap<RegionInfo, RegionStateNode> ritMap) { 109 this.regionInfo = regionInfo; 110 this.event = new AssignmentProcedureEvent(regionInfo); 111 this.ritMap = ritMap; 112 } 113 114 /** 115 * @param update new region state this node should be assigned. 116 * @param expected current state should be in this given list of expected states 117 * @return true, if current state is in expected list; otherwise false. 118 */ 119 public boolean setState(final State update, final State... expected) { 120 if (!isInState(expected)) { 121 return false; 122 } 123 this.state = update; 124 this.lastUpdate = EnvironmentEdgeManager.currentTime(); 125 return true; 126 } 127 128 /** 129 * Put region into OFFLINE mode (set state and clear location). 130 * @return Last recorded server deploy 131 */ 132 public ServerName offline() { 133 setState(State.OFFLINE); 134 return setRegionLocation(null); 135 } 136 137 /** 138 * Set new {@link State} but only if currently in <code>expected</code> State (if not, throw 139 * {@link UnexpectedStateException}. 140 */ 141 public void transitionState(final State update, final State... expected) 142 throws UnexpectedStateException { 143 if (!setState(update, expected)) { 144 throw new UnexpectedStateException("Expected " + Arrays.toString(expected) + 145 " so could move to " + update + " but current state=" + getState()); 146 } 147 } 148 149 /** 150 * Notice that, we will return true if {@code expected} is empty. 151 * <p/> 152 * This is a bit strange but we need this logic, for example, we can change the state to OPENING 153 * from any state, as in SCP we will not change the state to CLOSED before opening the region. 154 */ 155 public boolean isInState(State... expected) { 156 if (expected.length == 0) { 157 return true; 158 } 159 return getState().matches(expected); 160 } 161 162 public boolean isStuck() { 163 return isInState(State.FAILED_OPEN) && getProcedure() != null; 164 } 165 166 public boolean isInTransition() { 167 return getProcedure() != null; 168 } 169 170 public long getLastUpdate() { 171 TransitRegionStateProcedure proc = this.procedure; 172 if (proc != null) { 173 long lastUpdate = proc.getLastUpdate(); 174 return lastUpdate != 0 ? lastUpdate : proc.getSubmittedTime(); 175 } 176 return lastUpdate; 177 } 178 179 public void setLastHost(final ServerName serverName) { 180 this.lastHost = serverName; 181 } 182 183 public void setOpenSeqNum(final long seqId) { 184 this.openSeqNum = seqId; 185 } 186 187 public ServerName setRegionLocation(final ServerName serverName) { 188 ServerName lastRegionLocation = this.regionLocation; 189 if (LOG.isTraceEnabled() && serverName == null) { 190 LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE")); 191 } 192 this.regionLocation = serverName; 193 this.lastUpdate = EnvironmentEdgeManager.currentTime(); 194 return lastRegionLocation; 195 } 196 197 public void setProcedure(TransitRegionStateProcedure proc) { 198 assert this.procedure == null; 199 this.procedure = proc; 200 ritMap.put(regionInfo, this); 201 } 202 203 public void unsetProcedure(TransitRegionStateProcedure proc) { 204 assert this.procedure == proc; 205 this.procedure = null; 206 ritMap.remove(regionInfo, this); 207 } 208 209 public TransitRegionStateProcedure getProcedure() { 210 return procedure; 211 } 212 213 public ProcedureEvent<?> getProcedureEvent() { 214 return event; 215 } 216 217 public RegionInfo getRegionInfo() { 218 return regionInfo; 219 } 220 221 public TableName getTable() { 222 return getRegionInfo().getTable(); 223 } 224 225 public boolean isSystemTable() { 226 return getTable().isSystemTable(); 227 } 228 229 public ServerName getLastHost() { 230 return lastHost; 231 } 232 233 public ServerName getRegionLocation() { 234 return regionLocation; 235 } 236 237 public State getState() { 238 return state; 239 } 240 241 public long getOpenSeqNum() { 242 return openSeqNum; 243 } 244 245 public int getFormatVersion() { 246 // we don't have any format for now 247 // it should probably be in regionInfo.getFormatVersion() 248 return 0; 249 } 250 251 public RegionState toRegionState() { 252 return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation()); 253 } 254 255 @Override 256 public int compareTo(final RegionStateNode other) { 257 // NOTE: RegionInfo sort by table first, so we are relying on that. 258 // we have a TestRegionState#testOrderedByTable() that check for that. 259 return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo()); 260 } 261 262 @Override 263 public int hashCode() { 264 return getRegionInfo().hashCode(); 265 } 266 267 @Override 268 public boolean equals(final Object other) { 269 if (this == other) { 270 return true; 271 } 272 if (!(other instanceof RegionStateNode)) { 273 return false; 274 } 275 return compareTo((RegionStateNode) other) == 0; 276 } 277 278 @Override 279 public String toString() { 280 return toDescriptiveString(); 281 } 282 283 public String toShortString() { 284 // rit= is the current Region-In-Transition State -- see State enum. 285 return String.format("rit=%s, location=%s", getState(), getRegionLocation()); 286 } 287 288 public String toDescriptiveString() { 289 return String.format("%s, table=%s, region=%s", toShortString(), getTable(), 290 getRegionInfo().getEncodedName()); 291 } 292 293 public void checkOnline() throws DoNotRetryRegionException { 294 RegionInfo ri = getRegionInfo(); 295 State s = state; 296 if (s != State.OPEN) { 297 throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN; state=" + s); 298 } 299 if (ri.isSplitParent()) { 300 throw new DoNotRetryRegionException( 301 ri.getEncodedName() + " is not online (splitParent=true)"); 302 } 303 if (ri.isSplit()) { 304 throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true"); 305 } 306 if (ri.isOffline()) { 307 // RegionOfflineException is not instance of DNRIOE so wrap it. 308 throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName())); 309 } 310 } 311 312 public void lock() { 313 lock.lock(); 314 } 315 316 public void unlock() { 317 lock.unlock(); 318 } 319}