001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.util.Arrays;
021import java.util.concurrent.atomic.AtomicInteger;
022import org.apache.hadoop.hbase.HConstants;
023import org.apache.hadoop.hbase.ServerName;
024import org.apache.hadoop.hbase.TableName;
025import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.client.RegionOfflineException;
028import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
029import org.apache.hadoop.hbase.master.RegionState;
030import org.apache.hadoop.hbase.master.RegionState.State;
031import org.apache.hadoop.hbase.procedure2.Procedure;
032import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
033import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
034import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039/**
040 * Current Region State. Most fields are synchronized with meta region, i.e, we will update meta
041 * immediately after we modify this RegionStateNode, and usually under the lock. The only exception
042 * is {@link #lastHost}, which should not be used for critical condition.
043 * <p/>
044 * Typically, the only way to modify this class is through {@link TransitRegionStateProcedure}, and
045 * we will record the TRSP along with this RegionStateNode to make sure that there could at most one
046 * TRSP. For other operations, such as SCP, we will first get the lock, and then try to schedule a
047 * TRSP. If there is already one, then the solution will be different:
048 * <ul>
049 * <li>For SCP, we will update the region state in meta to tell the TRSP to retry.</li>
050 * <li>For DisableTableProcedure, as we have the xlock, we can make sure that the TRSP has not been
051 * executed yet, so just unset it and attach a new one. The original one will quit immediately when
052 * executing.</li>
053 * <li>For split/merge, we will fail immediately as there is no actual operations yet so no
054 * harm.</li>
055 * <li>For EnableTableProcedure/TruncateTableProcedure, we can make sure that there will be no TRSP
056 * attached with the RSNs.</li>
057 * <li>For other procedures, you'd better use ReopenTableRegionsProcedure. The RTRP will take care
058 * of lots of corner cases when reopening regions.</li>
059 * </ul>
060 * <p/>
061 * Several fields are declared with {@code volatile}, which means you are free to get it without
062 * lock, but usually you should not use these fields without locking for critical condition, as it
063 * will be easily to introduce inconsistency. For example, you are free to dump the status and show
064 * it on web without locking, but if you want to change the state of the RegionStateNode by checking
065 * the current state, you'd better have the lock...
066 */
067@InterfaceAudience.Private
068public class RegionStateNode implements Comparable<RegionStateNode> {
069
070  private static final Logger LOG = LoggerFactory.getLogger(RegionStateNode.class);
071  // It stores count of all active TRSP in the master. Had to pass it from regionStates to
072  // maintain the count
073  private final AtomicInteger activeTransitProcedureCount;
074
075  private static final class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
076    public AssignmentProcedureEvent(final RegionInfo regionInfo) {
077      super(regionInfo);
078    }
079  }
080
081  private final RegionStateNodeLock lock;
082  private final RegionInfo regionInfo;
083  private final ProcedureEvent<?> event;
084
085  // volatile only for getLastUpdate and test usage, the upper layer should sync on the
086  // RegionStateNode before accessing usually.
087  private volatile TransitRegionStateProcedure procedure = null;
088  private volatile ServerName regionLocation = null;
089  // notice that, the lastHost will only be updated when a region is successfully CLOSED through
090  // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
091  // with the data in meta.
092  private volatile ServerName lastHost = null;
093  /**
094   * A Region-in-Transition (RIT) moves through states. See {@link State} for complete list. A
095   * Region that is opened moves from OFFLINE => OPENING => OPENED.
096   */
097  private volatile State state = State.OFFLINE;
098
099  /**
100   * Updated whenever a call to {@link #setRegionLocation(ServerName)} or
101   * {@link #setState(RegionState.State, RegionState.State...)} or {@link #crashed(long)}.
102   */
103  private volatile long lastUpdate = 0;
104
105  private volatile long openSeqNum = HConstants.NO_SEQNUM;
106
107  RegionStateNode(RegionInfo regionInfo, AtomicInteger activeTransitProcedureCount) {
108    this.regionInfo = regionInfo;
109    this.event = new AssignmentProcedureEvent(regionInfo);
110    this.lock = new RegionStateNodeLock(regionInfo);
111    this.activeTransitProcedureCount = activeTransitProcedureCount;
112  }
113
114  /**
115   * @param update   new region state this node should be assigned.
116   * @param expected current state should be in this given list of expected states
117   * @return true, if current state is in expected list; otherwise false.
118   */
119  public boolean setState(final State update, final State... expected) {
120    if (!isInState(expected)) {
121      return false;
122    }
123    this.state = update;
124    this.lastUpdate = EnvironmentEdgeManager.currentTime();
125    return true;
126  }
127
128  /**
129   * Put region into OFFLINE mode (set state and clear location).
130   * @return Last recorded server deploy
131   */
132  public ServerName offline() {
133    setState(State.OFFLINE);
134    return setRegionLocation(null);
135  }
136
137  /**
138   * Set new {@link State} but only if currently in <code>expected</code> State (if not, throw
139   * {@link UnexpectedStateException}.
140   */
141  public void transitionState(final State update, final State... expected)
142    throws UnexpectedStateException {
143    if (!setState(update, expected)) {
144      throw new UnexpectedStateException("Expected " + Arrays.toString(expected)
145        + " so could move to " + update + " but current state=" + getState());
146    }
147  }
148
149  /**
150   * Notice that, we will return true if {@code expected} is empty.
151   * <p/>
152   * This is a bit strange but we need this logic, for example, we can change the state to OPENING
153   * from any state, as in SCP we will not change the state to CLOSED before opening the region.
154   */
155  public boolean isInState(State... expected) {
156    if (expected.length == 0) {
157      return true;
158    }
159    return getState().matches(expected);
160  }
161
162  public boolean isStuck() {
163    return isInState(State.FAILED_OPEN) && getProcedure() != null;
164  }
165
166  public boolean isTransitionScheduled() {
167    return getProcedure() != null;
168  }
169
170  /**
171   * Return whether the region has been split and not online.
172   * <p/>
173   * In this method we will test both region info and state, and will return true if either of the
174   * test returns true. Please see the comments in
175   * {@link AssignmentManager#markRegionAsSplit(RegionInfo, ServerName, RegionInfo, RegionInfo)} for
176   * more details on why we need to test two conditions.
177   */
178  public boolean isSplit() {
179    return regionInfo.isSplit() || isInState(State.SPLIT);
180  }
181
182  public long getLastUpdate() {
183    TransitRegionStateProcedure proc = this.procedure;
184    if (proc != null) {
185      long lastUpdate = proc.getLastUpdate();
186      return lastUpdate != 0 ? lastUpdate : proc.getSubmittedTime();
187    }
188    return lastUpdate;
189  }
190
191  public void setLastHost(final ServerName serverName) {
192    this.lastHost = serverName;
193  }
194
195  public void crashed(long crashTime) {
196    this.lastUpdate = crashTime;
197  }
198
199  public void setOpenSeqNum(final long seqId) {
200    this.openSeqNum = seqId;
201  }
202
203  public ServerName setRegionLocation(final ServerName serverName) {
204    ServerName lastRegionLocation = this.regionLocation;
205    if (LOG.isTraceEnabled() && serverName == null) {
206      LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
207    }
208    this.regionLocation = serverName;
209    this.lastUpdate = EnvironmentEdgeManager.currentTime();
210    return lastRegionLocation;
211  }
212
213  public TransitRegionStateProcedure setProcedure(TransitRegionStateProcedure proc) {
214    assert this.procedure == null;
215    this.procedure = proc;
216    activeTransitProcedureCount.incrementAndGet();
217    return proc;
218  }
219
220  public void unsetProcedure(TransitRegionStateProcedure proc) {
221    assert this.procedure == proc;
222    activeTransitProcedureCount.decrementAndGet();
223    this.procedure = null;
224  }
225
226  public TransitRegionStateProcedure getProcedure() {
227    return procedure;
228  }
229
230  public ProcedureEvent<?> getProcedureEvent() {
231    return event;
232  }
233
234  public RegionInfo getRegionInfo() {
235    return regionInfo;
236  }
237
238  public TableName getTable() {
239    return getRegionInfo().getTable();
240  }
241
242  public boolean isSystemTable() {
243    return getTable().isSystemTable();
244  }
245
246  public ServerName getLastHost() {
247    return lastHost;
248  }
249
250  public ServerName getRegionLocation() {
251    return regionLocation;
252  }
253
254  public String getRegionServerName() {
255    ServerName sn = getRegionLocation();
256    if (sn != null) {
257      return sn.getServerName();
258    }
259    return null;
260  }
261
262  public State getState() {
263    return state;
264  }
265
266  public long getOpenSeqNum() {
267    return openSeqNum;
268  }
269
270  public int getFormatVersion() {
271    // we don't have any format for now
272    // it should probably be in regionInfo.getFormatVersion()
273    return 0;
274  }
275
276  public RegionState toRegionState() {
277    return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
278  }
279
280  @Override
281  public int compareTo(final RegionStateNode other) {
282    // NOTE: RegionInfo sort by table first, so we are relying on that.
283    // we have a TestRegionState#testOrderedByTable() that check for that.
284    return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
285  }
286
287  @Override
288  public int hashCode() {
289    return getRegionInfo().hashCode();
290  }
291
292  @Override
293  public boolean equals(final Object other) {
294    if (this == other) {
295      return true;
296    }
297    if (!(other instanceof RegionStateNode)) {
298      return false;
299    }
300    return compareTo((RegionStateNode) other) == 0;
301  }
302
303  @Override
304  public String toString() {
305    return toDescriptiveString();
306  }
307
308  public String toShortString() {
309    // rit= is the current Region-In-Transition State -- see State enum.
310    return String.format("state=%s, location=%s", getState(), getRegionLocation());
311  }
312
313  public String toDescriptiveString() {
314    return String.format("%s, table=%s, region=%s", toShortString(), getTable(),
315      getRegionInfo().getEncodedName());
316  }
317
318  public void checkOnline() throws DoNotRetryRegionException {
319    RegionInfo ri = getRegionInfo();
320    State s = state;
321    if (s != State.OPEN) {
322      throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN; state=" + s);
323    }
324    if (ri.isSplitParent()) {
325      throw new DoNotRetryRegionException(
326        ri.getEncodedName() + " is not online (splitParent=true)");
327    }
328    if (ri.isSplit()) {
329      throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true");
330    }
331    if (ri.isOffline()) {
332      // RegionOfflineException is not instance of DNRIOE so wrap it.
333      throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName()));
334    }
335  }
336
337  // The below 3 methods are for normal locking operation, where the thread owner is the current
338  // thread. Typically you just need to use these 3 methods, and use try..finally to release the
339  // lock in the finally block
340  /**
341   * @see RegionStateNodeLock#lock()
342   */
343  public void lock() {
344    lock.lock();
345  }
346
347  /**
348   * @see RegionStateNodeLock#tryLock()
349   */
350  public boolean tryLock() {
351    return lock.tryLock();
352  }
353
354  /**
355   * @see RegionStateNodeLock#unlock()
356   */
357  public void unlock() {
358    lock.unlock();
359  }
360
361  // The below 3 methods are for locking region state node when executing procedures, where we may
362  // do some time consuming work under the lock, for example, updating meta. As we may suspend the
363  // procedure while holding the lock and then release it when the procedure is back, in another
364  // thread, so we need to use the procedure itself as owner, instead of the current thread. You can
365  // see the usage in TRSP, SCP, and RegionRemoteProcedureBase for more details.
366  // Notice that, this does not mean you must use these 3 methods when locking region state node in
367  // procedure, you are free to use the above 3 methods if you do not want to hold the lock when
368  // suspending the procedure.
369  /**
370   * @see RegionStateNodeLock#lock(Procedure, Runnable)
371   */
372  public void lock(Procedure<?> proc, Runnable wakeUp) throws ProcedureSuspendedException {
373    lock.lock(proc, wakeUp);
374  }
375
376  /**
377   * @see RegionStateNodeLock#tryLock(Procedure)
378   */
379  public boolean tryLock(Procedure<?> proc) {
380    return lock.tryLock(proc);
381  }
382
383  /**
384   * @see RegionStateNodeLock#unlock(Procedure)
385   */
386  public void unlock(Procedure<?> proc) {
387    lock.unlock(proc);
388  }
389
390  /**
391   * @see RegionStateNodeLock#isLocked()
392   */
393  boolean isLocked() {
394    return lock.isLocked();
395  }
396
397  /**
398   * @see RegionStateNodeLock#isLockedBy(Object)
399   */
400  public boolean isLockedBy(Procedure<?> proc) {
401    return lock.isLockedBy(proc);
402  }
403}