001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import com.google.errorprone.annotations.RestrictedApi;
021import java.io.IOException;
022import java.util.Optional;
023import org.apache.hadoop.hbase.ServerName;
024import org.apache.hadoop.hbase.TableName;
025import org.apache.hadoop.hbase.client.RegionInfo;
026import org.apache.hadoop.hbase.master.RegionState;
027import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
028import org.apache.hadoop.hbase.master.assignment.RegionStates;
029import org.apache.hadoop.hbase.master.assignment.ServerState;
030import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
031import org.apache.hadoop.hbase.procedure2.Procedure;
032import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
033import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
034import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
035import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
036import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
037import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
038import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
039import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
040import org.apache.hadoop.hbase.regionserver.SnapshotRegionCallable;
041import org.apache.hadoop.hbase.util.RetryCounter;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
048import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotRegionProcedureStateData;
049import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
050import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
051
052/**
053 * A remote procedure which is used to send region snapshot request to region server. The basic
054 * logic of SnapshotRegionProcedure is similar like {@link ServerRemoteProcedure}, only with a
055 * little difference, when {@link FailedRemoteDispatchException} was thrown, SnapshotRegionProcedure
056 * will sleep some time and continue retrying until success.
057 */
058@InterfaceAudience.Private
059public class SnapshotRegionProcedure extends Procedure<MasterProcedureEnv>
060  implements TableProcedureInterface, RemoteProcedure<MasterProcedureEnv, ServerName> {
061  private static final Logger LOG = LoggerFactory.getLogger(SnapshotRegionProcedure.class);
062
063  private SnapshotDescription snapshot;
064  private ProcedureEvent<?> event;
065  private RegionInfo region;
066  private boolean dispatched;
067  private boolean succ;
068  private RetryCounter retryCounter;
069
070  public SnapshotRegionProcedure() {
071  }
072
073  public SnapshotRegionProcedure(SnapshotDescription snapshot, RegionInfo region) {
074    this.snapshot = snapshot;
075    this.region = region;
076  }
077
078  @Override
079  protected LockState acquireLock(final MasterProcedureEnv env) {
080    if (env.getProcedureScheduler().waitRegions(this, getTableName(), region)) {
081      return LockState.LOCK_EVENT_WAIT;
082    }
083    return LockState.LOCK_ACQUIRED;
084  }
085
086  @Override
087  protected void releaseLock(final MasterProcedureEnv env) {
088    env.getProcedureScheduler().wakeRegions(this, getTableName(), region);
089  }
090
091  @Override
092  protected boolean holdLock(MasterProcedureEnv env) {
093    return false;
094  }
095
096  @Override
097  public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) {
098    return Optional
099      .of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), SnapshotRegionCallable.class,
100        MasterProcedureProtos.SnapshotRegionParameter.newBuilder()
101          .setRegion(ProtobufUtil.toRegionInfo(region)).setSnapshot(snapshot).build().toByteArray(),
102        env.getMasterServices().getMasterActiveTime()));
103  }
104
105  @Override
106  public void remoteCallFailed(MasterProcedureEnv env, ServerName serverName, IOException e) {
107    complete(env, e);
108  }
109
110  @Override
111  public void remoteOperationCompleted(MasterProcedureEnv env) {
112    complete(env, null);
113  }
114
115  @Override
116  public void remoteOperationFailed(MasterProcedureEnv env, RemoteProcedureException e) {
117    complete(env, e);
118  }
119
120  // keep retrying until success
121  private void complete(MasterProcedureEnv env, Throwable error) {
122    if (isFinished()) {
123      LOG.info("This procedure {} is already finished, skip the rest processes", this.getProcId());
124      return;
125    }
126    if (event == null) {
127      LOG.warn("procedure event for {} is null, maybe the procedure is created when recovery",
128        getProcId());
129      return;
130    }
131    if (error == null) {
132      LOG.info("finish snapshot {} on region {}", snapshot.getName(), region.getEncodedName());
133      succ = true;
134    }
135
136    event.wake(env.getProcedureScheduler());
137    event = null;
138  }
139
140  @Override
141  public TableName getTableName() {
142    return region.getTable();
143  }
144
145  @Override
146  public TableOperationType getTableOperationType() {
147    return TableOperationType.REGION_SNAPSHOT;
148  }
149
150  @Override
151  protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
152    throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
153    if (dispatched) {
154      if (succ) {
155        return null;
156      }
157      dispatched = false;
158    }
159
160    RegionStates regionStates = env.getAssignmentManager().getRegionStates();
161    RegionStateNode regionNode = regionStates.getRegionStateNode(region);
162    regionNode.lock();
163    try {
164      if (regionNode.getProcedure() != null) {
165        setTimeoutForSuspend(env, String.format("region %s has a TRSP attached %s",
166          region.getRegionNameAsString(), regionNode.getProcedure()));
167        throw new ProcedureSuspendedException();
168      }
169      if (!regionNode.isInState(RegionState.State.OPEN)) {
170        setTimeoutForSuspend(env, String.format("region state of %s is %s",
171          region.getRegionNameAsString(), regionNode.getState()));
172        throw new ProcedureSuspendedException();
173      }
174      ServerName targetServer = regionNode.getRegionLocation();
175      if (targetServer == null) {
176        setTimeoutForSuspend(env,
177          String.format("target server of region %s is null", region.getRegionNameAsString()));
178        throw new ProcedureSuspendedException();
179      }
180      ServerState serverState = regionStates.getServerNode(targetServer).getState();
181      if (serverState != ServerState.ONLINE) {
182        setTimeoutForSuspend(env, String.format("target server of region %s %s is in state %s",
183          region.getRegionNameAsString(), targetServer, serverState));
184        throw new ProcedureSuspendedException();
185      }
186      try {
187        env.getRemoteDispatcher().addOperationToNode(targetServer, this);
188        dispatched = true;
189        event = new ProcedureEvent<>(this);
190        event.suspendIfNotReady(this);
191        throw new ProcedureSuspendedException();
192      } catch (FailedRemoteDispatchException e) {
193        setTimeoutForSuspend(env, "Failed send request to " + targetServer);
194        throw new ProcedureSuspendedException();
195      }
196    } finally {
197      regionNode.unlock();
198    }
199  }
200
201  @Override
202  protected void rollback(MasterProcedureEnv env) {
203    throw new UnsupportedOperationException();
204  }
205
206  private void setTimeoutForSuspend(MasterProcedureEnv env, String reason) {
207    if (retryCounter == null) {
208      retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
209    }
210    long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
211    LOG.warn("{} can not run currently because {}, wait {} ms to retry", this, reason, backoff);
212    setTimeout(Math.toIntExact(backoff));
213    setState(ProcedureState.WAITING_TIMEOUT);
214    skipPersistence();
215  }
216
217  @Override
218  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
219    setState(ProcedureState.RUNNABLE);
220    env.getProcedureScheduler().addFront(this);
221    return false;
222  }
223
224  @Override
225  protected boolean abort(MasterProcedureEnv env) {
226    return false;
227  }
228
229  @Override
230  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
231    SnapshotRegionProcedureStateData.Builder builder =
232      SnapshotRegionProcedureStateData.newBuilder();
233    builder.setSnapshot(snapshot);
234    builder.setRegion(ProtobufUtil.toRegionInfo(region));
235    serializer.serialize(builder.build());
236  }
237
238  @Override
239  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
240    SnapshotRegionProcedureStateData data =
241      serializer.deserialize(SnapshotRegionProcedureStateData.class);
242    this.snapshot = data.getSnapshot();
243    this.region = ProtobufUtil.toRegionInfo(data.getRegion());
244  }
245
246  @Override
247  public String getProcName() {
248    return getClass().getSimpleName() + " " + region.getEncodedName();
249  }
250
251  @Override
252  protected void toStringClassDetails(StringBuilder builder) {
253    builder.append(getProcName());
254  }
255
256  @Override
257  protected boolean waitInitialized(MasterProcedureEnv env) {
258    return env.waitInitialized(this);
259  }
260
261  public RegionInfo getRegion() {
262    return region;
263  }
264
265  @RestrictedApi(explanation = "Should only be called in tests", link = "",
266      allowedOnPath = ".*(/src/test/.*|TestSnapshotProcedure).java")
267  boolean inRetrying() {
268    return retryCounter != null;
269  }
270}