001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.Optional;
022import org.apache.hadoop.hbase.ServerName;
023import org.apache.hadoop.hbase.TableName;
024import org.apache.hadoop.hbase.client.RegionInfo;
025import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
026import org.apache.hadoop.hbase.procedure2.Procedure;
027import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
028import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
029import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
030import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
031import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
032import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
033import org.apache.hadoop.hbase.regionserver.SnapshotVerifyCallable;
034import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
035import org.apache.hadoop.hbase.util.RetryCounter;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
041import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyParameter;
042import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyProcedureStateData;
043import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
045
046/**
047 * A remote procedure which is used to send verify snapshot request to region server.
048 */
049@InterfaceAudience.Private
050public class SnapshotVerifyProcedure extends ServerRemoteProcedure
051  implements TableProcedureInterface {
052  private static final Logger LOG = LoggerFactory.getLogger(SnapshotVerifyProcedure.class);
053
054  private SnapshotDescription snapshot;
055  private RegionInfo region;
056
057  private RetryCounter retryCounter;
058
059  public SnapshotVerifyProcedure() {
060  }
061
062  public SnapshotVerifyProcedure(SnapshotDescription snapshot, RegionInfo region) {
063    this.snapshot = snapshot;
064    this.region = region;
065  }
066
067  @Override
068  protected void rollback(MasterProcedureEnv env) {
069    // nothing to rollback
070  }
071
072  @Override
073  protected boolean abort(MasterProcedureEnv env) {
074    return false;
075  }
076
077  @Override
078  protected synchronized void complete(MasterProcedureEnv env, Throwable error) {
079    try {
080      if (error != null) {
081        if (error instanceof RemoteProcedureException) {
082          // remote operation failed
083          Throwable remoteEx = unwrapRemoteProcedureException((RemoteProcedureException) error);
084          if (remoteEx instanceof CorruptedSnapshotException) {
085            // snapshot is corrupted, will touch a flag file and finish the procedure
086            succ = true;
087            SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor()
088              .getProcedure(SnapshotProcedure.class, getParentProcId());
089            if (parent != null) {
090              parent.markSnapshotCorrupted();
091            }
092          } else {
093            // unexpected exception in remote server, will retry on other servers
094            succ = false;
095          }
096        } else {
097          // the mostly like thing is that remote call failed, will retry on other servers
098          succ = false;
099        }
100      } else {
101        // remote operation finished without error
102        succ = true;
103      }
104    } catch (IOException e) {
105      // if we can't create the flag file, then mark the current procedure as FAILED
106      // and rollback the whole snapshot procedure stack.
107      LOG.warn("Failed create corrupted snapshot flag file for snapshot={}, region={}",
108        snapshot.getName(), region, e);
109      setFailure("verify-snapshot", e);
110    } finally {
111      // release the worker
112      env.getMasterServices().getSnapshotManager().releaseSnapshotVerifyWorker(this, targetServer,
113        env.getProcedureScheduler());
114    }
115  }
116
117  // we will wrap remote exception into a RemoteProcedureException,
118  // here we try to unwrap it
119  private Throwable unwrapRemoteProcedureException(RemoteProcedureException e) {
120    return e.getCause();
121  }
122
123  @Override
124  protected synchronized Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
125    throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
126    try {
127      // if we've already known the snapshot is corrupted, then stop scheduling
128      // the new procedures and the undispatched procedures
129      if (!dispatched) {
130        SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor()
131          .getProcedure(SnapshotProcedure.class, getParentProcId());
132        if (parent != null && parent.isSnapshotCorrupted()) {
133          return null;
134        }
135      }
136      // acquire a worker
137      if (!dispatched && targetServer == null) {
138        targetServer =
139          env.getMasterServices().getSnapshotManager().acquireSnapshotVerifyWorker(this);
140      }
141      // send remote request
142      Procedure<MasterProcedureEnv>[] res = super.execute(env);
143      // retry if necessary
144      if (!dispatched) {
145        // the mostly like thing is that a FailedRemoteDispatchException is thrown.
146        // we need to retry on another remote server
147        targetServer = null;
148        throw new FailedRemoteDispatchException("Failed sent request");
149      } else {
150        // the request was successfully dispatched
151        return res;
152      }
153    } catch (IOException e) {
154      // there are some cases we need to retry:
155      // 1. we can't get response from hdfs
156      // 2. the remote server crashed
157      if (retryCounter == null) {
158        retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
159      }
160      long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
161      LOG.warn("Failed to get snapshot verify result , wait {} ms to retry", backoff, e);
162      setTimeout(Math.toIntExact(backoff));
163      setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
164      skipPersistence();
165      throw new ProcedureSuspendedException();
166    }
167  }
168
169  @Override
170  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
171    setState(ProcedureProtos.ProcedureState.RUNNABLE);
172    env.getProcedureScheduler().addFront(this);
173    return false;
174  }
175
176  @Override
177  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
178    SnapshotVerifyProcedureStateData.Builder builder =
179      SnapshotVerifyProcedureStateData.newBuilder();
180    builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region));
181    if (targetServer != null) {
182      builder.setTargetServer(ProtobufUtil.toServerName(targetServer));
183    }
184    serializer.serialize(builder.build());
185  }
186
187  @Override
188  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
189    SnapshotVerifyProcedureStateData data =
190      serializer.deserialize(SnapshotVerifyProcedureStateData.class);
191    this.snapshot = data.getSnapshot();
192    this.region = ProtobufUtil.toRegionInfo(data.getRegion());
193    if (data.hasTargetServer()) {
194      this.targetServer = ProtobufUtil.toServerName(data.getTargetServer());
195    }
196  }
197
198  @Override
199  protected void toStringClassDetails(StringBuilder builder) {
200    builder.append(getClass().getSimpleName()).append(", snapshot=").append(snapshot.getName());
201    if (targetServer != null) {
202      builder.append(", targetServer=").append(targetServer);
203    }
204  }
205
206  @Override
207  public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) {
208    SnapshotVerifyParameter.Builder builder = SnapshotVerifyParameter.newBuilder();
209    builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region));
210    return Optional.of(new RSProcedureDispatcher.ServerOperation(this, getProcId(),
211      SnapshotVerifyCallable.class, builder.build().toByteArray()));
212  }
213
214  @Override
215  public TableName getTableName() {
216    return TableName.valueOf(snapshot.getTable());
217  }
218
219  @Override
220  public TableOperationType getTableOperationType() {
221    return TableOperationType.SNAPSHOT;
222  }
223
224  public ServerName getServerName() {
225    return targetServer;
226  }
227}