001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import org.apache.hadoop.fs.Path;
022import org.apache.hadoop.hbase.ServerName;
023import org.apache.hadoop.hbase.master.SplitWALManager;
024import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
025import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
026import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
027import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
028import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
029import org.apache.hadoop.hbase.util.RetryCounter;
030import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
031import org.apache.yetus.audience.InterfaceAudience;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
036import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
037import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
038
039/**
040 * The procedure is to split a WAL. It will get an available region server and
041 * schedule a {@link SplitWALRemoteProcedure} to actually send the request to region
042 * server to split this WAL.
043 * It also check if the split wal task really succeed. If the WAL still exists, it will
044 * schedule another region server to split this WAL.
045 */
046@InterfaceAudience.Private
047public class SplitWALProcedure
048    extends StateMachineProcedure<MasterProcedureEnv, MasterProcedureProtos.SplitWALState>
049    implements ServerProcedureInterface {
050  private static final Logger LOG = LoggerFactory.getLogger(SplitWALProcedure.class);
051  private String walPath;
052  private ServerName worker;
053  private ServerName crashedServer;
054  private RetryCounter retryCounter;
055
056  public SplitWALProcedure() {
057  }
058
059  public SplitWALProcedure(String walPath, ServerName crashedServer) {
060    this.walPath = walPath;
061    this.crashedServer = crashedServer;
062  }
063
064  @Override
065  protected Flow executeFromState(MasterProcedureEnv env, MasterProcedureProtos.SplitWALState state)
066      throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
067    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
068    switch (state) {
069      case ACQUIRE_SPLIT_WAL_WORKER:
070        worker = splitWALManager.acquireSplitWALWorker(this);
071        setNextState(MasterProcedureProtos.SplitWALState.DISPATCH_WAL_TO_WORKER);
072        return Flow.HAS_MORE_STATE;
073      case DISPATCH_WAL_TO_WORKER:
074        assert worker != null;
075        addChildProcedure(new SplitWALRemoteProcedure(worker, crashedServer, walPath));
076        setNextState(MasterProcedureProtos.SplitWALState.RELEASE_SPLIT_WORKER);
077        return Flow.HAS_MORE_STATE;
078      case RELEASE_SPLIT_WORKER:
079        boolean finished;
080        try {
081          finished = splitWALManager.isSplitWALFinished(walPath);
082        } catch (IOException ioe) {
083          if (retryCounter == null) {
084            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
085          }
086          long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
087          LOG.warn("Failed to check whether splitting wal {} success, wait {} seconds to retry",
088            walPath, backoff / 1000, ioe);
089          setTimeout(Math.toIntExact(backoff));
090          setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
091          skipPersistence();
092          throw new ProcedureSuspendedException();
093        }
094        splitWALManager.releaseSplitWALWorker(worker, env.getProcedureScheduler());
095        if (!finished) {
096          LOG.warn("Failed to split wal {} by server {}, retry...", walPath, worker);
097          setNextState(MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER);
098          return Flow.HAS_MORE_STATE;
099        }
100        ServerCrashProcedure.updateProgress(env, getParentProcId());
101        return Flow.NO_MORE_STATE;
102      default:
103        throw new UnsupportedOperationException("unhandled state=" + state);
104    }
105  }
106
107  @Override
108  protected void rollbackState(MasterProcedureEnv env,
109      MasterProcedureProtos.SplitWALState splitOneWalState)
110      throws IOException, InterruptedException {
111    if (splitOneWalState == getInitialState()) {
112      return;
113    }
114    throw new UnsupportedOperationException();
115  }
116
117  @Override
118  protected MasterProcedureProtos.SplitWALState getState(int stateId) {
119    return MasterProcedureProtos.SplitWALState.forNumber(stateId);
120  }
121
122  @Override
123  protected int getStateId(MasterProcedureProtos.SplitWALState state) {
124    return state.getNumber();
125  }
126
127  @Override
128  protected MasterProcedureProtos.SplitWALState getInitialState() {
129    return MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER;
130  }
131
132  @Override
133  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
134    super.serializeStateData(serializer);
135    MasterProcedureProtos.SplitWALData.Builder builder =
136        MasterProcedureProtos.SplitWALData.newBuilder();
137    builder.setWalPath(walPath).setCrashedServer(ProtobufUtil.toServerName(crashedServer));
138    if (worker != null) {
139      builder.setWorker(ProtobufUtil.toServerName(worker));
140    }
141    serializer.serialize(builder.build());
142  }
143
144  @Override
145  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
146    super.deserializeStateData(serializer);
147    MasterProcedureProtos.SplitWALData data =
148        serializer.deserialize(MasterProcedureProtos.SplitWALData.class);
149    walPath = data.getWalPath();
150    crashedServer = ProtobufUtil.toServerName(data.getCrashedServer());
151    if (data.hasWorker()) {
152      worker = ProtobufUtil.toServerName(data.getWorker());
153    }
154  }
155
156  @Override
157  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
158    setState(ProcedureProtos.ProcedureState.RUNNABLE);
159    env.getProcedureScheduler().addFront(this);
160    return false;
161  }
162
163  public String getWAL() {
164    return walPath;
165  }
166
167  public ServerName getWorker(){
168    return worker;
169  }
170
171  @Override
172  public ServerName getServerName() {
173    return this.crashedServer;
174  }
175
176  @Override
177  public boolean hasMetaTableRegion() {
178    return AbstractFSWALProvider.isMetaFile(new Path(walPath));
179  }
180
181  @Override
182  public ServerOperationType getServerOperationType() {
183    return ServerOperationType.SPLIT_WAL;
184  }
185
186  @Override
187  protected void afterReplay(MasterProcedureEnv env){
188    if (worker != null) {
189      if (env != null && env.getMasterServices() != null &&
190          env.getMasterServices().getSplitWALManager() != null) {
191        env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker);
192      }
193    }
194  }
195
196  @Override protected void toStringClassDetails(StringBuilder builder) {
197    builder.append(getProcName());
198    if (this.worker != null) {
199      builder.append(", worker=");
200      builder.append(this.worker);
201    }
202    if (this.retryCounter != null) {
203      builder.append(", retry=");
204      builder.append(this.retryCounter);
205    }
206  }
207
208  @Override public String getProcName() {
209    return getClass().getSimpleName() + " " + getWALNameFromStrPath(getWAL());
210  }
211
212  /**
213   * @return Return the WAL filename when given a Path-as-a-string; i.e. return the last path
214   *   component only.
215   */
216  static String getWALNameFromStrPath(String path) {
217    int slashIndex = path.lastIndexOf('/');
218    return slashIndex != -1? path.substring(slashIndex + 1): path;
219  }
220}