001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.util.Arrays;
021import java.util.concurrent.ConcurrentMap;
022import java.util.concurrent.locks.Lock;
023import java.util.concurrent.locks.ReentrantLock;
024import org.apache.hadoop.hbase.HConstants;
025import org.apache.hadoop.hbase.ServerName;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.client.RegionOfflineException;
030import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
031import org.apache.hadoop.hbase.master.RegionState;
032import org.apache.hadoop.hbase.master.RegionState.State;
033import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
034import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
040
041/**
042 * Current Region State. Most fields are synchronized with meta region, i.e, we will update meta
043 * immediately after we modify this RegionStateNode, and usually under the lock. The only exception
044 * is {@link #lastHost}, which should not be used for critical condition.
045 * <p/>
046 * Typically, the only way to modify this class is through {@link TransitRegionStateProcedure}, and
047 * we will record the TRSP along with this RegionStateNode to make sure that there could at most one
048 * TRSP. For other operations, such as SCP, we will first get the lock, and then try to schedule a
049 * TRSP. If there is already one, then the solution will be different:
050 * <ul>
051 * <li>For SCP, we will update the region state in meta to tell the TRSP to retry.</li>
052 * <li>For DisableTableProcedure, as we have the xlock, we can make sure that the TRSP has not been
053 * executed yet, so just unset it and attach a new one. The original one will quit immediately when
054 * executing.</li>
055 * <li>For split/merge, we will fail immediately as there is no actual operations yet so no
056 * harm.</li>
057 * <li>For EnableTableProcedure/TruncateTableProcedure, we can make sure that there will be no TRSP
058 * attached with the RSNs.</li>
059 * <li>For other procedures, you'd better use ReopenTableRegionsProcedure. The RTRP will take care
060 * of lots of corner cases when reopening regions.</li>
061 * </ul>
062 * <p/>
063 * Several fields are declared with {@code volatile}, which means you are free to get it without
064 * lock, but usually you should not use these fields without locking for critical condition, as it
065 * will be easily to introduce inconsistency. For example, you are free to dump the status and show
066 * it on web without locking, but if you want to change the state of the RegionStateNode by checking
067 * the current state, you'd better have the lock...
068 */
069@InterfaceAudience.Private
070public class RegionStateNode implements Comparable<RegionStateNode> {
071
072  private static final Logger LOG = LoggerFactory.getLogger(RegionStateNode.class);
073
074  private static final class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
075    public AssignmentProcedureEvent(final RegionInfo regionInfo) {
076      super(regionInfo);
077    }
078  }
079
080  @VisibleForTesting
081  final Lock lock = new ReentrantLock();
082  private final RegionInfo regionInfo;
083  private final ProcedureEvent<?> event;
084  private final ConcurrentMap<RegionInfo, RegionStateNode> ritMap;
085
086  // volatile only for getLastUpdate and test usage, the upper layer should sync on the
087  // RegionStateNode before accessing usually.
088  private volatile TransitRegionStateProcedure procedure = null;
089  private volatile ServerName regionLocation = null;
090  // notice that, the lastHost will only be updated when a region is successfully CLOSED through
091  // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
092  // with the data in meta.
093  private volatile ServerName lastHost = null;
094  /**
095   * A Region-in-Transition (RIT) moves through states. See {@link State} for complete list. A
096   * Region that is opened moves from OFFLINE => OPENING => OPENED.
097   */
098  private volatile State state = State.OFFLINE;
099
100  /**
101   * Updated whenever a call to {@link #setRegionLocation(ServerName)} or
102   * {@link #setState(RegionState.State, RegionState.State...)}.
103   */
104  private volatile long lastUpdate = 0;
105
106  private volatile long openSeqNum = HConstants.NO_SEQNUM;
107
108  RegionStateNode(RegionInfo regionInfo, ConcurrentMap<RegionInfo, RegionStateNode> ritMap) {
109    this.regionInfo = regionInfo;
110    this.event = new AssignmentProcedureEvent(regionInfo);
111    this.ritMap = ritMap;
112  }
113
114  /**
115   * @param update new region state this node should be assigned.
116   * @param expected current state should be in this given list of expected states
117   * @return true, if current state is in expected list; otherwise false.
118   */
119  public boolean setState(final State update, final State... expected) {
120    if (!isInState(expected)) {
121      return false;
122    }
123    this.state = update;
124    this.lastUpdate = EnvironmentEdgeManager.currentTime();
125    return true;
126  }
127
128  /**
129   * Put region into OFFLINE mode (set state and clear location).
130   * @return Last recorded server deploy
131   */
132  public ServerName offline() {
133    setState(State.OFFLINE);
134    return setRegionLocation(null);
135  }
136
137  /**
138   * Set new {@link State} but only if currently in <code>expected</code> State (if not, throw
139   * {@link UnexpectedStateException}.
140   */
141  public void transitionState(final State update, final State... expected)
142      throws UnexpectedStateException {
143    if (!setState(update, expected)) {
144      throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
145        " so could move to " + update + " but current state=" + getState());
146    }
147  }
148
149  /**
150   * Notice that, we will return true if {@code expected} is empty.
151   * <p/>
152   * This is a bit strange but we need this logic, for example, we can change the state to OPENING
153   * from any state, as in SCP we will not change the state to CLOSED before opening the region.
154   */
155  public boolean isInState(State... expected) {
156    if (expected.length == 0) {
157      return true;
158    }
159    return getState().matches(expected);
160  }
161
162  public boolean isStuck() {
163    return isInState(State.FAILED_OPEN) && getProcedure() != null;
164  }
165
166  public boolean isInTransition() {
167    return getProcedure() != null;
168  }
169
170  public long getLastUpdate() {
171    TransitRegionStateProcedure proc = this.procedure;
172    return proc != null ? proc.getLastUpdate() : lastUpdate;
173  }
174
175  public void setLastHost(final ServerName serverName) {
176    this.lastHost = serverName;
177  }
178
179  public void setOpenSeqNum(final long seqId) {
180    this.openSeqNum = seqId;
181  }
182
183  public ServerName setRegionLocation(final ServerName serverName) {
184    ServerName lastRegionLocation = this.regionLocation;
185    if (LOG.isTraceEnabled() && serverName == null) {
186      LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
187    }
188    this.regionLocation = serverName;
189    this.lastUpdate = EnvironmentEdgeManager.currentTime();
190    return lastRegionLocation;
191  }
192
193  public void setProcedure(TransitRegionStateProcedure proc) {
194    assert this.procedure == null;
195    this.procedure = proc;
196    ritMap.put(regionInfo, this);
197  }
198
199  public void unsetProcedure(TransitRegionStateProcedure proc) {
200    assert this.procedure == proc;
201    this.procedure = null;
202    ritMap.remove(regionInfo, this);
203  }
204
205  public TransitRegionStateProcedure getProcedure() {
206    return procedure;
207  }
208
209  public ProcedureEvent<?> getProcedureEvent() {
210    return event;
211  }
212
213  public RegionInfo getRegionInfo() {
214    return regionInfo;
215  }
216
217  public TableName getTable() {
218    return getRegionInfo().getTable();
219  }
220
221  public boolean isSystemTable() {
222    return getTable().isSystemTable();
223  }
224
225  public ServerName getLastHost() {
226    return lastHost;
227  }
228
229  public ServerName getRegionLocation() {
230    return regionLocation;
231  }
232
233  public State getState() {
234    return state;
235  }
236
237  public long getOpenSeqNum() {
238    return openSeqNum;
239  }
240
241  public int getFormatVersion() {
242    // we don't have any format for now
243    // it should probably be in regionInfo.getFormatVersion()
244    return 0;
245  }
246
247  public RegionState toRegionState() {
248    return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
249  }
250
251  @Override
252  public int compareTo(final RegionStateNode other) {
253    // NOTE: RegionInfo sort by table first, so we are relying on that.
254    // we have a TestRegionState#testOrderedByTable() that check for that.
255    return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
256  }
257
258  @Override
259  public int hashCode() {
260    return getRegionInfo().hashCode();
261  }
262
263  @Override
264  public boolean equals(final Object other) {
265    if (this == other) {
266      return true;
267    }
268    if (!(other instanceof RegionStateNode)) {
269      return false;
270    }
271    return compareTo((RegionStateNode) other) == 0;
272  }
273
274  @Override
275  public String toString() {
276    return toDescriptiveString();
277  }
278
279  public String toShortString() {
280    // rit= is the current Region-In-Transition State -- see State enum.
281    return String.format("state=%s, location=%s", getState(), getRegionLocation());
282  }
283
284  public String toDescriptiveString() {
285    return String.format("%s, table=%s, region=%s", toShortString(), getTable(),
286      getRegionInfo().getEncodedName());
287  }
288
289  public void checkOnline() throws DoNotRetryRegionException {
290    RegionInfo ri = getRegionInfo();
291    State s = state;
292    if (s != State.OPEN) {
293      throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN; state=" + s);
294    }
295    if (ri.isSplitParent()) {
296      throw new DoNotRetryRegionException(
297        ri.getEncodedName() + " is not online (splitParent=true)");
298    }
299    if (ri.isSplit()) {
300      throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true");
301    }
302    if (ri.isOffline()) {
303      // RegionOfflineException is not instance of DNRIOE so wrap it.
304      throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName()));
305    }
306  }
307
308  public void lock() {
309    lock.lock();
310  }
311
312  public void unlock() {
313    lock.unlock();
314  }
315}