001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.master.assignment;
021
022import java.io.IOException;
023import java.util.Comparator;
024
025import org.apache.hadoop.hbase.ServerName;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.RegionInfo;
028import org.apache.hadoop.hbase.client.RetriesExhaustedException;
029import org.apache.hadoop.hbase.client.TableState;
030import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
031import org.apache.hadoop.hbase.master.MasterServices;
032import org.apache.hadoop.hbase.master.RegionState.State;
033import org.apache.hadoop.hbase.master.TableStateManager;
034import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
035import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
036import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation;
037import org.apache.hadoop.hbase.master.procedure.ServerCrashException;
038import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
039import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
040import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
041import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
046import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.AssignRegionStateData;
047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
048import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
049
050/**
051 * Procedure that describe the assignment of a single region.
052 * There can only be one RegionTransitionProcedure per region running at a time
053 * since each procedure takes a lock on the region.
054 *
055 * <p>The Assign starts by pushing the "assign" operation to the AssignmentManager
056 * and then will go in a "waiting" state.
057 * The AM will batch the "assign" requests and ask the Balancer where to put
058 * the region (the various policies will be respected: retain, round-robin, random).
059 * Once the AM and the balancer have found a place for the region the procedure
060 * will be resumed and an "open region" request will be placed in the Remote Dispatcher
061 * queue, and the procedure once again will go in a "waiting state".
062 * The Remote Dispatcher will batch the various requests for that server and
063 * they will be sent to the RS for execution.
064 * The RS will complete the open operation by calling master.reportRegionStateTransition().
065 * The AM will intercept the transition report, and notify the procedure.
066 * The procedure will finish the assignment by publishing to new state on meta
067 * or it will retry the assignment.
068 *
069 * <p>This procedure does not rollback when beyond the first
070 * REGION_TRANSITION_QUEUE step; it will press on trying to assign in the face of
071 * failure. Should we ignore rollback calls to Assign/Unassign then? Or just
072 * remove rollback here?
073 */
074// TODO: Add being able to assign a region to open read-only.
075@InterfaceAudience.Private
076public class AssignProcedure extends RegionTransitionProcedure {
077  private static final Logger LOG = LoggerFactory.getLogger(AssignProcedure.class);
078
079  /**
080   * Set to true when we need recalibrate -- choose a new target -- because original assign failed.
081   */
082  private boolean forceNewPlan = false;
083
084  /**
085   * Gets set as desired target on move, merge, etc., when we want to go to a particular server.
086   * We may not be able to respect this request but will try. When it is NOT set, then we ask
087   * the balancer to assign. This value is used below in startTransition to set regionLocation if
088   * non-null. Setting regionLocation in regionServerNode is how we override balancer setting
089   * destination.
090   */
091  protected volatile ServerName targetServer;
092
093  /**
094   * Comparator that will sort AssignProcedures so meta assigns come first, then system table
095   * assigns and finally user space assigns.
096   */
097  public static final CompareAssignProcedure COMPARATOR = new CompareAssignProcedure();
098
099  public AssignProcedure() {
100    // Required by the Procedure framework to create the procedure on replay
101    super();
102  }
103
104  public AssignProcedure(final RegionInfo regionInfo) {
105    this(regionInfo, null);
106  }
107
108  public AssignProcedure(final RegionInfo regionInfo, final ServerName destinationServer) {
109    this(regionInfo, destinationServer, false);
110  }
111
112  public AssignProcedure(final RegionInfo regionInfo, final ServerName destinationServer,
113      boolean override) {
114    super(regionInfo, override);
115    this.targetServer = destinationServer;
116  }
117
118  @Override
119  public TableOperationType getTableOperationType() {
120    return TableOperationType.REGION_ASSIGN;
121  }
122
123  @Override
124  protected boolean isRollbackSupported(final RegionTransitionState state) {
125    switch (state) {
126      case REGION_TRANSITION_QUEUE:
127        return true;
128      default:
129        return false;
130    }
131  }
132
133  @Override
134  protected void serializeStateData(ProcedureStateSerializer serializer)
135      throws IOException {
136    final AssignRegionStateData.Builder state = AssignRegionStateData.newBuilder()
137        .setTransitionState(getTransitionState())
138        .setRegionInfo(ProtobufUtil.toRegionInfo(getRegionInfo()));
139    if (forceNewPlan) {
140      state.setForceNewPlan(true);
141    }
142    if (this.targetServer != null) {
143      state.setTargetServer(ProtobufUtil.toServerName(this.targetServer));
144    }
145    if (getAttempt() > 0) {
146      state.setAttempt(getAttempt());
147    }
148    if (isOverride()) {
149      state.setOverride(isOverride());
150    }
151    serializer.serialize(state.build());
152  }
153
154  @Override
155  protected void deserializeStateData(ProcedureStateSerializer serializer)
156      throws IOException {
157    final AssignRegionStateData state = serializer.deserialize(AssignRegionStateData.class);
158    setTransitionState(state.getTransitionState());
159    setRegionInfo(ProtobufUtil.toRegionInfo(state.getRegionInfo()));
160    forceNewPlan = state.getForceNewPlan();
161    setOverride(state.getOverride());
162    if (state.hasTargetServer()) {
163      this.targetServer = ProtobufUtil.toServerName(state.getTargetServer());
164    }
165    if (state.hasAttempt()) {
166      setAttempt(state.getAttempt());
167    }
168  }
169
170  /**
171   * Used by ServerCrashProcedure too skip creating Assigns if not needed.
172   * @return Skip out on the assign; returns 'true'/assign if exception.
173   */
174  public static boolean assign(MasterServices masterServices, RegionInfo ri) {
175    try {
176      return assign(masterServices,
177          masterServices.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(ri));
178    } catch (IOException e) {
179      LOG.warn("Letting assign proceed", e);
180    }
181    return true;
182  }
183
184  protected static boolean assign(MasterServices masterServices, final RegionStateNode regionNode)
185      throws IOException {
186    // If the region is already open we can't do much...
187    if (regionNode.isInState(State.OPEN) &&
188        masterServices.getServerManager().isServerOnline(regionNode.getRegionLocation())) {
189      LOG.info("Assigned, not reassigning {}", regionNode.toShortString());
190      return false;
191    }
192    // Don't assign if table is in disabling or disabled state.
193    TableStateManager tsm = masterServices.getTableStateManager();
194    TableName tn = regionNode.getRegionInfo().getTable();
195    TableState ts = tsm.getTableState(tn);
196    if (ts.isDisabledOrDisabling()) {
197      LOG.info("{} so SKIPPING assign of {}", ts, regionNode.getRegionInfo().getEncodedName());
198      return false;
199    }
200    return true;
201  }
202
203  @Override
204  protected boolean startTransition(final MasterProcedureEnv env, final RegionStateNode regionNode)
205      throws IOException {
206    if (!assign(env.getMasterServices(), regionNode)) {
207      return false;
208    }
209    // If the region is SPLIT, we can't assign it. But state might be CLOSED, rather than
210    // SPLIT which is what a region gets set to when unassigned as part of SPLIT. FIX.
211    if (regionNode.isInState(State.SPLIT) ||
212        (regionNode.getRegionInfo().isOffline() && regionNode.getRegionInfo().isSplit())) {
213      LOG.info("SPLIT, cannot be assigned; " + this + "; " + regionNode +
214        "; hri=" + regionNode.getRegionInfo());
215      return false;
216    }
217
218    // If we haven't started the operation yet, we can abort
219    if (aborted.get() && regionNode.isInState(State.CLOSED, State.OFFLINE)) {
220      if (incrementAndCheckMaxAttempts(env, regionNode)) {
221        regionNode.setState(State.FAILED_OPEN);
222        setFailure(getClass().getSimpleName(),
223          new RetriesExhaustedException("Max attempts exceeded"));
224      } else {
225        setAbortFailure(getClass().getSimpleName(), "Abort requested");
226      }
227      return false;
228    }
229
230    // Send assign (add into assign-pool). We call regionNode.offline below to set state to
231    // OFFLINE and to clear the region location. Setting a new regionLocation here is how we retain
232    // old assignment or specify target server if a move or merge. See
233    // AssignmentManager#processAssignQueue. Otherwise, balancer gives us location.
234    // TODO: Region will be set into OFFLINE state below regardless of what its previous state was
235    // This is dangerous? Wrong? What if region was in an unexpected state?
236    ServerName lastRegionLocation = regionNode.offline();
237    boolean retain = false;
238    if (!forceNewPlan) {
239      if (this.targetServer != null) {
240        retain = targetServer.equals(lastRegionLocation);
241        regionNode.setRegionLocation(targetServer);
242      } else {
243        if (lastRegionLocation != null) {
244          // Try and keep the location we had before we offlined.
245          retain = true;
246          regionNode.setRegionLocation(lastRegionLocation);
247        } else if (regionNode.getLastHost() != null) {
248          retain = true;
249          LOG.info("Setting lastHost as the region location " + regionNode.getLastHost());
250          regionNode.setRegionLocation(regionNode.getLastHost());
251        }
252      }
253    }
254    LOG.info("Starting " + this + "; " + regionNode.toShortString() +
255        "; forceNewPlan=" + this.forceNewPlan +
256        ", retain=" + retain);
257    env.getAssignmentManager().queueAssign(regionNode);
258    return true;
259  }
260
261  @Override
262  protected boolean updateTransition(final MasterProcedureEnv env, final RegionStateNode regionNode)
263  throws IOException, ProcedureSuspendedException {
264    // TODO: crash if destinationServer is specified and not online
265    // which is also the case when the balancer provided us with a different location.
266    if (LOG.isTraceEnabled()) {
267      LOG.trace("Update " + this + "; " + regionNode.toShortString());
268    }
269    if (regionNode.getRegionLocation() == null) {
270      setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE);
271      return true;
272    }
273
274    if (!isServerOnline(env, regionNode)) {
275      // TODO: is this correct? should we wait the chore/ssh?
276      LOG.info("Server not online, re-queuing " + this + "; " + regionNode.toShortString());
277      setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE);
278      return true;
279    }
280
281    if (env.getAssignmentManager().waitServerReportEvent(regionNode.getRegionLocation(), this)) {
282      LOG.info("Early suspend! " + this + "; " + regionNode.toShortString());
283      throw new ProcedureSuspendedException();
284    }
285
286    if (regionNode.isInState(State.OPEN)) {
287      LOG.info("Already assigned: " + this + "; " + regionNode.toShortString());
288      return false;
289    }
290
291    // Transition regionNode State. Set it to OPENING. Update hbase:meta, and add
292    // region to list of regions on the target regionserver. Need to UNDO if failure!
293    env.getAssignmentManager().markRegionAsOpening(regionNode);
294
295    // TODO: Requires a migration to be open by the RS?
296    // regionNode.getFormatVersion()
297
298    if (!addToRemoteDispatcher(env, regionNode.getRegionLocation())) {
299      // Failed the dispatch BUT addToRemoteDispatcher internally does
300      // cleanup on failure -- even the undoing of markRegionAsOpening above --
301      // so nothing more to do here; in fact we need to get out of here
302      // fast since we've been put back on the scheduler.
303    }
304
305    // We always return true, even if we fail dispatch because addToRemoteDispatcher
306    // failure processing sets state back to REGION_TRANSITION_QUEUE so we try again;
307    // i.e. return true to keep the Procedure running; it has been reset to startover.
308    return true;
309  }
310
311  @Override
312  protected void finishTransition(final MasterProcedureEnv env, final RegionStateNode regionNode)
313      throws IOException {
314    env.getAssignmentManager().markRegionAsOpened(regionNode);
315    // This success may have been after we failed open a few times. Be sure to cleanup any
316    // failed open references. See #incrementAndCheckMaxAttempts and where it is called.
317    env.getAssignmentManager().getRegionStates().removeFromFailedOpen(regionNode.getRegionInfo());
318  }
319
320  @Override
321  protected void reportTransition(final MasterProcedureEnv env, final RegionStateNode regionNode,
322      final TransitionCode code, final long openSeqNum) throws UnexpectedStateException {
323    switch (code) {
324      case OPENED:
325        if (openSeqNum < 0) {
326          throw new UnexpectedStateException("Received report unexpected " + code +
327              " transition openSeqNum=" + openSeqNum + ", " + regionNode);
328        }
329        if (openSeqNum < regionNode.getOpenSeqNum()) {
330          // Don't bother logging if openSeqNum == 0
331          if (openSeqNum != 0) {
332            LOG.warn("Skipping update of open seqnum with " + openSeqNum +
333                " because current seqnum=" + regionNode.getOpenSeqNum());
334          }
335        } else {
336          regionNode.setOpenSeqNum(openSeqNum);
337        }
338        // Leave the state here as OPENING for now. We set it to OPEN in
339        // REGION_TRANSITION_FINISH section where we do a bunch of checks.
340        // regionNode.setState(RegionState.State.OPEN, RegionState.State.OPENING);
341        setTransitionState(RegionTransitionState.REGION_TRANSITION_FINISH);
342        break;
343      case FAILED_OPEN:
344        handleFailure(env, regionNode);
345        break;
346      default:
347        throw new UnexpectedStateException("Received report unexpected " + code +
348            " transition openSeqNum=" + openSeqNum + ", " + regionNode.toShortString() +
349            ", " + this + ", expected OPENED or FAILED_OPEN.");
350    }
351  }
352
353  /**
354   * Called when dispatch or subsequent OPEN request fail. Can be run by the
355   * inline dispatch call or later by the ServerCrashProcedure. Our state is
356   * generally OPENING. Cleanup and reset to OFFLINE and put our Procedure
357   * State back to REGION_TRANSITION_QUEUE so the Assign starts over.
358   */
359  private void handleFailure(final MasterProcedureEnv env, final RegionStateNode regionNode) {
360    if (incrementAndCheckMaxAttempts(env, regionNode)) {
361      aborted.set(true);
362    }
363    this.forceNewPlan = true;
364    this.targetServer = null;
365    regionNode.offline();
366    // We were moved to OPENING state before dispatch. Undo. It is safe to call
367    // this method because it checks for OPENING first.
368    env.getAssignmentManager().undoRegionAsOpening(regionNode);
369    setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE);
370  }
371
372  private boolean incrementAndCheckMaxAttempts(final MasterProcedureEnv env,
373      final RegionStateNode regionNode) {
374    final int retries = env.getAssignmentManager().getRegionStates().
375        addToFailedOpen(regionNode).incrementAndGetRetries();
376    int max = env.getAssignmentManager().getAssignMaxAttempts();
377    LOG.info("Retry=" + retries + " of max=" + max + "; " +
378        this + "; " + regionNode.toShortString());
379    return retries >= max;
380  }
381
382  @Override
383  public RemoteOperation remoteCallBuild(final MasterProcedureEnv env, final ServerName serverName) {
384    assert serverName.equals(getRegionState(env).getRegionLocation());
385    return new RegionOpenOperation(this, getRegionInfo(),
386        env.getAssignmentManager().getFavoredNodes(getRegionInfo()), false);
387  }
388
389  @Override
390  protected boolean remoteCallFailed(final MasterProcedureEnv env, final RegionStateNode regionNode,
391      final IOException exception) {
392    RegionTransitionState tState = getTransitionState();
393    if (tState == RegionTransitionState.REGION_TRANSITION_FINISH
394        && exception instanceof ServerCrashException) {
395      // if we found that AssignProcedure is at this stage, then ServerCerash handling may/may not
396      // have any effect
397      // depending upon the race between handling of the failure and execution at
398      // REGION_TRANSITION_FINISH state
399      LOG.warn("Assign Procedure is at state:" + tState
400          + ", so Handling of Server Crash may not have any affect");
401      return false;
402    }
403    handleFailure(env, regionNode);
404    return true;
405  }
406
407  @Override
408  public void toStringClassDetails(StringBuilder sb) {
409    super.toStringClassDetails(sb);
410    if (this.targetServer != null) sb.append(", target=").append(this.targetServer);
411  }
412
413  @Override
414  public ServerName getServer(final MasterProcedureEnv env) {
415    RegionStateNode node =
416        env.getAssignmentManager().getRegionStates().getRegionStateNode(this.getRegionInfo());
417    if (node == null) return null;
418    return node.getRegionLocation();
419  }
420
421  @Override
422  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
423    return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics();
424  }
425
426  /**
427   * Sort AssignProcedures such that meta and system assigns come first before user-space assigns.
428   * Have to do it this way w/ distinct Comparator because Procedure is already Comparable on
429   * 'Env'(?).
430   */
431  public static class CompareAssignProcedure implements Comparator<AssignProcedure> {
432    @Override
433    public int compare(AssignProcedure left, AssignProcedure right) {
434      if (left.getRegionInfo().isMetaRegion()) {
435        if (right.getRegionInfo().isMetaRegion()) {
436          return RegionInfo.COMPARATOR.compare(left.getRegionInfo(), right.getRegionInfo());
437        }
438        return -1;
439      } else if (right.getRegionInfo().isMetaRegion()) {
440        return +1;
441      }
442      if (left.getRegionInfo().getTable().isSystemTable()) {
443        if (right.getRegionInfo().getTable().isSystemTable()) {
444          return RegionInfo.COMPARATOR.compare(left.getRegionInfo(), right.getRegionInfo());
445        }
446        return -1;
447      } else if (right.getRegionInfo().getTable().isSystemTable()) {
448        return +1;
449      }
450      return RegionInfo.COMPARATOR.compare(left.getRegionInfo(), right.getRegionInfo());
451    }
452  }
453}