001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.Optional;
022import org.apache.hadoop.hbase.ServerName;
023import org.apache.hadoop.hbase.TableName;
024import org.apache.hadoop.hbase.client.RegionInfo;
025import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
026import org.apache.hadoop.hbase.procedure2.Procedure;
027import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
028import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
029import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
030import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
031import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
032import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
033import org.apache.hadoop.hbase.regionserver.SnapshotVerifyCallable;
034import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
035import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
036import org.apache.hadoop.hbase.util.RetryCounter;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
042import org.apache.hadoop.hbase.shaded.protobuf.generated.ErrorHandlingProtos;
043import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyParameter;
045import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyProcedureStateData;
046import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
047import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
048
049/**
050 * A remote procedure which is used to send verify snapshot request to region server.
051 */
052@InterfaceAudience.Private
053public class SnapshotVerifyProcedure extends ServerRemoteProcedure
054  implements TableProcedureInterface {
055  private static final Logger LOG = LoggerFactory.getLogger(SnapshotVerifyProcedure.class);
056
057  private SnapshotDescription snapshot;
058  private RegionInfo region;
059
060  private RetryCounter retryCounter;
061
062  public SnapshotVerifyProcedure() {
063  }
064
065  public SnapshotVerifyProcedure(SnapshotDescription snapshot, RegionInfo region) {
066    this.snapshot = snapshot;
067    this.region = region;
068  }
069
070  @Override
071  protected void rollback(MasterProcedureEnv env) {
072    // nothing to rollback
073  }
074
075  @Override
076  protected boolean abort(MasterProcedureEnv env) {
077    return false;
078  }
079
080  @Override
081  protected synchronized boolean complete(MasterProcedureEnv env, Throwable error) {
082    boolean isProcedureCompleted = false;
083    try {
084      if (error != null) {
085        if (error instanceof RemoteProcedureException) {
086          // remote operation failed
087          Throwable remoteEx = unwrapRemoteProcedureException((RemoteProcedureException) error);
088          if (remoteEx instanceof CorruptedSnapshotException) {
089            // snapshot is corrupted, will touch a flag file and finish the procedure
090            isProcedureCompleted = true;
091            SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor()
092              .getProcedure(SnapshotProcedure.class, getParentProcId());
093            if (parent != null) {
094              parent.markSnapshotCorrupted();
095            }
096          } // else unexpected exception in remote server, will retry on other servers,
097            // procedureCompleted will stay false
098        } // else the mostly like thing is that remote call failed, will retry on other servers,
099          // procedureCompleted will stay false
100      } else {
101        // remote operation finished without error
102        isProcedureCompleted = true;
103      }
104    } catch (IOException e) {
105      // if we can't create the flag file, then mark the current procedure as FAILED
106      // and rollback the whole snapshot procedure stack.
107      LOG.warn("Failed create corrupted snapshot flag file for snapshot={}, region={}",
108        snapshot.getName(), region, e);
109      setFailure("verify-snapshot", e);
110    } finally {
111      // release the worker
112      env.getMasterServices().getSnapshotManager().releaseSnapshotVerifyWorker(this, targetServer);
113    }
114    return isProcedureCompleted;
115  }
116
117  // we will wrap remote exception into a RemoteProcedureException,
118  // here we try to unwrap it
119  private Throwable unwrapRemoteProcedureException(RemoteProcedureException e) {
120    return e.getCause();
121  }
122
123  @Override
124  protected synchronized Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
125    throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
126    try {
127      // if we've already known the snapshot is corrupted, then stop scheduling
128      // the new procedures and the undispatched procedures
129      if (
130        state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH
131      ) {
132        SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor()
133          .getProcedure(SnapshotProcedure.class, getParentProcId());
134        if (parent != null && parent.isSnapshotCorrupted()) {
135          return null;
136        }
137      }
138      // acquire a worker
139      if (
140        state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH
141          && targetServer == null
142      ) {
143        targetServer =
144          env.getMasterServices().getSnapshotManager().acquireSnapshotVerifyWorker(this);
145      }
146      // send remote request
147      Procedure<MasterProcedureEnv>[] res = super.execute(env);
148      // retry if necessary
149      if (
150        state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH
151      ) {
152        // the mostly like thing is that a FailedRemoteDispatchException is thrown.
153        // we need to retry on another remote server
154        targetServer = null;
155        throw new FailedRemoteDispatchException("Failed sent request");
156      } else {
157        // the request was successfully dispatched
158        return res;
159      }
160    } catch (IOException e) {
161      // there are some cases we need to retry:
162      // 1. we can't get response from hdfs
163      // 2. the remote server crashed
164      if (retryCounter == null) {
165        retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
166      }
167      long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
168      LOG.warn("Failed to get snapshot verify result , wait {} ms to retry", backoff, e);
169      setTimeout(Math.toIntExact(backoff));
170      setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
171      skipPersistence();
172      throw new ProcedureSuspendedException();
173    }
174  }
175
176  @Override
177  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
178    setState(ProcedureProtos.ProcedureState.RUNNABLE);
179    env.getProcedureScheduler().addFront(this);
180    return false;
181  }
182
183  @Override
184  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
185    SnapshotVerifyProcedureStateData.Builder builder =
186      SnapshotVerifyProcedureStateData.newBuilder();
187    builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)).setState(state);
188    if (targetServer != null) {
189      builder.setTargetServer(ProtobufUtil.toServerName(targetServer));
190    }
191    if (this.remoteError != null) {
192      ErrorHandlingProtos.ForeignExceptionMessage fem =
193        ForeignExceptionUtil.toProtoForeignException(remoteError);
194      builder.setError(fem);
195    }
196    serializer.serialize(builder.build());
197  }
198
199  @Override
200  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
201    SnapshotVerifyProcedureStateData data =
202      serializer.deserialize(SnapshotVerifyProcedureStateData.class);
203    this.snapshot = data.getSnapshot();
204    this.region = ProtobufUtil.toRegionInfo(data.getRegion());
205    this.state = data.getState();
206    if (data.hasTargetServer()) {
207      this.targetServer = ProtobufUtil.toServerName(data.getTargetServer());
208    }
209    if (data.hasError()) {
210      this.remoteError = ForeignExceptionUtil.toException(data.getError());
211    }
212  }
213
214  @Override
215  protected void toStringClassDetails(StringBuilder builder) {
216    builder.append(getClass().getSimpleName()).append(", snapshot=").append(snapshot.getName());
217    if (targetServer != null) {
218      builder.append(", targetServer=").append(targetServer);
219    }
220  }
221
222  @Override
223  public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) {
224    SnapshotVerifyParameter.Builder builder = SnapshotVerifyParameter.newBuilder();
225    builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region));
226    return Optional
227      .of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), SnapshotVerifyCallable.class,
228        builder.build().toByteArray(), env.getMasterServices().getMasterActiveTime()));
229  }
230
231  @Override
232  public TableName getTableName() {
233    return TableName.valueOf(snapshot.getTable());
234  }
235
236  @Override
237  public TableOperationType getTableOperationType() {
238    return TableOperationType.SNAPSHOT;
239  }
240
241  public ServerName getServerName() {
242    return targetServer;
243  }
244}