001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.master.procedure;
020
021import java.io.IOException;
022
023import org.apache.hadoop.fs.Path;
024import org.apache.hadoop.hbase.ServerName;
025import org.apache.hadoop.hbase.master.SplitWALManager;
026import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
027import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
028import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
029import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
030import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
031import org.apache.hadoop.hbase.util.RetryCounter;
032import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
038import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
039import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
040import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
041
042/**
043 * The procedure is to split a WAL. It will get an available region server and
044 * schedule a {@link SplitWALRemoteProcedure} to actually send the request to region
045 * server to split this WAL.
046 * It also check if the split wal task really succeed. If the WAL still exists, it will
047 * schedule another region server to split this WAL.
048 */
049@InterfaceAudience.Private
050public class SplitWALProcedure
051    extends StateMachineProcedure<MasterProcedureEnv, MasterProcedureProtos.SplitWALState>
052    implements ServerProcedureInterface {
053  private static final Logger LOG = LoggerFactory.getLogger(SplitWALProcedure.class);
054  private String walPath;
055  private ServerName worker;
056  private ServerName crashedServer;
057  private RetryCounter retryCounter;
058
059  public SplitWALProcedure() {
060  }
061
062  public SplitWALProcedure(String walPath, ServerName crashedServer) {
063    this.walPath = walPath;
064    this.crashedServer = crashedServer;
065  }
066
067  @Override
068  protected Flow executeFromState(MasterProcedureEnv env, MasterProcedureProtos.SplitWALState state)
069      throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
070    SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager();
071    switch (state) {
072      case ACQUIRE_SPLIT_WAL_WORKER:
073        worker = splitWALManager.acquireSplitWALWorker(this);
074        setNextState(MasterProcedureProtos.SplitWALState.DISPATCH_WAL_TO_WORKER);
075        return Flow.HAS_MORE_STATE;
076      case DISPATCH_WAL_TO_WORKER:
077        assert worker != null;
078        addChildProcedure(new SplitWALRemoteProcedure(worker, crashedServer, walPath));
079        setNextState(MasterProcedureProtos.SplitWALState.RELEASE_SPLIT_WORKER);
080        return Flow.HAS_MORE_STATE;
081      case RELEASE_SPLIT_WORKER:
082        boolean finished;
083        try {
084          finished = splitWALManager.isSplitWALFinished(walPath);
085        } catch (IOException ioe) {
086          if (retryCounter == null) {
087            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
088          }
089          long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
090          LOG.warn("Failed to check whether splitting wal {} success, wait {} seconds to retry",
091            walPath, backoff / 1000, ioe);
092          setTimeout(Math.toIntExact(backoff));
093          setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
094          skipPersistence();
095          throw new ProcedureSuspendedException();
096        }
097        splitWALManager.releaseSplitWALWorker(worker, env.getProcedureScheduler());
098        if (!finished) {
099          LOG.warn("Failed to split wal {} by server {}, retry...", walPath, worker);
100          setNextState(MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER);
101          return Flow.HAS_MORE_STATE;
102        }
103        ServerCrashProcedure.updateProgress(env, getParentProcId());
104        return Flow.NO_MORE_STATE;
105      default:
106        throw new UnsupportedOperationException("unhandled state=" + state);
107    }
108  }
109
110  @Override
111  protected void rollbackState(MasterProcedureEnv env,
112      MasterProcedureProtos.SplitWALState splitOneWalState)
113      throws IOException, InterruptedException {
114    if (splitOneWalState == getInitialState()) {
115      return;
116    }
117    throw new UnsupportedOperationException();
118  }
119
120  @Override
121  protected MasterProcedureProtos.SplitWALState getState(int stateId) {
122    return MasterProcedureProtos.SplitWALState.forNumber(stateId);
123  }
124
125  @Override
126  protected int getStateId(MasterProcedureProtos.SplitWALState state) {
127    return state.getNumber();
128  }
129
130  @Override
131  protected MasterProcedureProtos.SplitWALState getInitialState() {
132    return MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER;
133  }
134
135  @Override
136  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
137    super.serializeStateData(serializer);
138    MasterProcedureProtos.SplitWALData.Builder builder =
139        MasterProcedureProtos.SplitWALData.newBuilder();
140    builder.setWalPath(walPath).setCrashedServer(ProtobufUtil.toServerName(crashedServer));
141    if (worker != null) {
142      builder.setWorker(ProtobufUtil.toServerName(worker));
143    }
144    serializer.serialize(builder.build());
145  }
146
147  @Override
148  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
149    super.deserializeStateData(serializer);
150    MasterProcedureProtos.SplitWALData data =
151        serializer.deserialize(MasterProcedureProtos.SplitWALData.class);
152    walPath = data.getWalPath();
153    crashedServer = ProtobufUtil.toServerName(data.getCrashedServer());
154    if (data.hasWorker()) {
155      worker = ProtobufUtil.toServerName(data.getWorker());
156    }
157  }
158
159  @Override
160  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
161    setState(ProcedureProtos.ProcedureState.RUNNABLE);
162    env.getProcedureScheduler().addFront(this);
163    return false;
164  }
165
166  public String getWAL() {
167    return walPath;
168  }
169
170  @VisibleForTesting
171  public ServerName getWorker(){
172    return worker;
173  }
174
175  @Override
176  public ServerName getServerName() {
177    return this.crashedServer;
178  }
179
180  @Override
181  public boolean hasMetaTableRegion() {
182    return AbstractFSWALProvider.isMetaFile(new Path(walPath));
183  }
184
185  @Override
186  public ServerOperationType getServerOperationType() {
187    return ServerOperationType.SPLIT_WAL;
188  }
189
190  @Override
191  protected void afterReplay(MasterProcedureEnv env){
192    if(worker != null){
193      env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker);
194    }
195
196  }
197}