001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import org.apache.hadoop.fs.Path;
022import org.apache.hadoop.hbase.ServerName;
023import org.apache.hadoop.hbase.master.SplitWALManager;
024import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
025import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
026import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
027import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
028import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
029import org.apache.hadoop.hbase.util.RetryCounter;
030import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
031import org.apache.yetus.audience.InterfaceAudience;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
036import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
037import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
038
039/**
040 * The procedure is to split a WAL. It will get an available region server and schedule a
041 * {@link SplitWALRemoteProcedure} to actually send the request to region server to split this WAL.
042 * It also check if the split wal task really succeed. If the WAL still exists, it will schedule
043 * another region server to split this WAL.
044 */
045@InterfaceAudience.Private
046public class SplitWALProcedure
047  extends StateMachineProcedure<MasterProcedureEnv, MasterProcedureProtos.SplitWALState>
048  implements ServerProcedureInterface {
049  private static final Logger LOG = LoggerFactory.getLogger(SplitWALProcedure.class);
050  private String walPath;
051  private ServerName worker;
052  private ServerName crashedServer;
053  private RetryCounter retryCounter;
054
055  public SplitWALProcedure() {
056  }
057
058  public SplitWALProcedure(String walPath, ServerName crashedServer) {
059    this.walPath = walPath;
060    this.crashedServer = crashedServer;
061  }
062
063  @Override
064  protected Flow executeFromState(MasterProcedureEnv env, MasterProcedureProtos.SplitWALState state)
065    throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
066    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
067    switch (state) {
068      case ACQUIRE_SPLIT_WAL_WORKER:
069        worker = splitWALManager.acquireSplitWALWorker(this);
070        setNextState(MasterProcedureProtos.SplitWALState.DISPATCH_WAL_TO_WORKER);
071        return Flow.HAS_MORE_STATE;
072      case DISPATCH_WAL_TO_WORKER:
073        assert worker != null;
074        addChildProcedure(new SplitWALRemoteProcedure(worker, crashedServer, walPath));
075        setNextState(MasterProcedureProtos.SplitWALState.RELEASE_SPLIT_WORKER);
076        return Flow.HAS_MORE_STATE;
077      case RELEASE_SPLIT_WORKER:
078        boolean finished;
079        try {
080          finished = splitWALManager.isSplitWALFinished(walPath);
081        } catch (IOException ioe) {
082          if (retryCounter == null) {
083            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
084          }
085          long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
086          LOG.warn("Failed to check whether splitting wal {} success, wait {} seconds to retry",
087            walPath, backoff / 1000, ioe);
088          setTimeout(Math.toIntExact(backoff));
089          setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
090          skipPersistence();
091          throw new ProcedureSuspendedException();
092        }
093        splitWALManager.releaseSplitWALWorker(worker, env.getProcedureScheduler());
094        if (!finished) {
095          LOG.warn("Failed to split wal {} by server {}, retry...", walPath, worker);
096          setNextState(MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER);
097          return Flow.HAS_MORE_STATE;
098        }
099        ServerCrashProcedure.updateProgress(env, getParentProcId());
100        return Flow.NO_MORE_STATE;
101      default:
102        throw new UnsupportedOperationException("unhandled state=" + state);
103    }
104  }
105
106  @Override
107  protected void rollbackState(MasterProcedureEnv env,
108    MasterProcedureProtos.SplitWALState splitOneWalState) throws IOException, InterruptedException {
109    if (splitOneWalState == getInitialState()) {
110      return;
111    }
112    throw new UnsupportedOperationException();
113  }
114
115  @Override
116  protected MasterProcedureProtos.SplitWALState getState(int stateId) {
117    return MasterProcedureProtos.SplitWALState.forNumber(stateId);
118  }
119
120  @Override
121  protected int getStateId(MasterProcedureProtos.SplitWALState state) {
122    return state.getNumber();
123  }
124
125  @Override
126  protected MasterProcedureProtos.SplitWALState getInitialState() {
127    return MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER;
128  }
129
130  @Override
131  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
132    super.serializeStateData(serializer);
133    MasterProcedureProtos.SplitWALData.Builder builder =
134      MasterProcedureProtos.SplitWALData.newBuilder();
135    builder.setWalPath(walPath).setCrashedServer(ProtobufUtil.toServerName(crashedServer));
136    if (worker != null) {
137      builder.setWorker(ProtobufUtil.toServerName(worker));
138    }
139    serializer.serialize(builder.build());
140  }
141
142  @Override
143  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
144    super.deserializeStateData(serializer);
145    MasterProcedureProtos.SplitWALData data =
146      serializer.deserialize(MasterProcedureProtos.SplitWALData.class);
147    walPath = data.getWalPath();
148    crashedServer = ProtobufUtil.toServerName(data.getCrashedServer());
149    if (data.hasWorker()) {
150      worker = ProtobufUtil.toServerName(data.getWorker());
151    }
152  }
153
154  @Override
155  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
156    setState(ProcedureProtos.ProcedureState.RUNNABLE);
157    env.getProcedureScheduler().addFront(this);
158    return false;
159  }
160
161  public String getWAL() {
162    return walPath;
163  }
164
165  public ServerName getWorker() {
166    return worker;
167  }
168
169  @Override
170  public ServerName getServerName() {
171    return this.crashedServer;
172  }
173
174  @Override
175  public boolean hasMetaTableRegion() {
176    return AbstractFSWALProvider.isMetaFile(new Path(walPath));
177  }
178
179  @Override
180  public ServerOperationType getServerOperationType() {
181    return ServerOperationType.SPLIT_WAL;
182  }
183
184  @Override
185  protected void afterReplay(MasterProcedureEnv env) {
186    if (worker != null) {
187      if (
188        env != null && env.getMasterServices() != null
189          && env.getMasterServices().getSplitWALManager() != null
190      ) {
191        env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker);
192      }
193    }
194  }
195
196  @Override
197  protected void toStringClassDetails(StringBuilder builder) {
198    builder.append(getProcName());
199    if (this.worker != null) {
200      builder.append(", worker=");
201      builder.append(this.worker);
202    }
203    if (this.retryCounter != null) {
204      builder.append(", retry=");
205      builder.append(this.retryCounter);
206    }
207  }
208
209  @Override
210  public String getProcName() {
211    return getClass().getSimpleName() + " " + getWALNameFromStrPath(getWAL());
212  }
213
214  /**
215   * @return Return the WAL filename when given a Path-as-a-string; i.e. return the last path
216   *         component only.
217   */
218  static String getWALNameFromStrPath(String path) {
219    int slashIndex = path.lastIndexOf('/');
220    return slashIndex != -1 ? path.substring(slashIndex + 1) : path;
221  }
222}