001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.master.procedure; 020 021import java.io.IOException; 022 023import org.apache.hadoop.fs.Path; 024import org.apache.hadoop.hbase.ServerName; 025import org.apache.hadoop.hbase.master.SplitWALManager; 026import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 027import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 028import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 029import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 030import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; 031import org.apache.hadoop.hbase.util.RetryCounter; 032import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; 033import org.apache.yetus.audience.InterfaceAudience; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 038import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 039import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 040import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 041 042/** 043 * The procedure is to split a WAL. It will get an available region server and 044 * schedule a {@link SplitWALRemoteProcedure} to actually send the request to region 045 * server to split this WAL. 046 * It also check if the split wal task really succeed. If the WAL still exists, it will 047 * schedule another region server to split this WAL. 048 */ 049@InterfaceAudience.Private 050public class SplitWALProcedure 051 extends StateMachineProcedure<MasterProcedureEnv, MasterProcedureProtos.SplitWALState> 052 implements ServerProcedureInterface { 053 private static final Logger LOG = LoggerFactory.getLogger(SplitWALProcedure.class); 054 private String walPath; 055 private ServerName worker; 056 private ServerName crashedServer; 057 private RetryCounter retryCounter; 058 059 public SplitWALProcedure() { 060 } 061 062 public SplitWALProcedure(String walPath, ServerName crashedServer) { 063 this.walPath = walPath; 064 this.crashedServer = crashedServer; 065 } 066 067 @Override 068 protected Flow executeFromState(MasterProcedureEnv env, MasterProcedureProtos.SplitWALState state) 069 throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { 070 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 071 switch (state) { 072 case ACQUIRE_SPLIT_WAL_WORKER: 073 worker = splitWALManager.acquireSplitWALWorker(this); 074 setNextState(MasterProcedureProtos.SplitWALState.DISPATCH_WAL_TO_WORKER); 075 return Flow.HAS_MORE_STATE; 076 case DISPATCH_WAL_TO_WORKER: 077 assert worker != null; 078 addChildProcedure(new SplitWALRemoteProcedure(worker, crashedServer, walPath)); 079 setNextState(MasterProcedureProtos.SplitWALState.RELEASE_SPLIT_WORKER); 080 return Flow.HAS_MORE_STATE; 081 case RELEASE_SPLIT_WORKER: 082 boolean finished; 083 try { 084 finished = splitWALManager.isSplitWALFinished(walPath); 085 } catch (IOException ioe) { 086 if (retryCounter == null) { 087 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 088 } 089 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 090 LOG.warn("Failed to check whether splitting wal {} success, wait {} seconds to retry", 091 walPath, backoff / 1000, ioe); 092 setTimeout(Math.toIntExact(backoff)); 093 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 094 skipPersistence(); 095 throw new ProcedureSuspendedException(); 096 } 097 splitWALManager.releaseSplitWALWorker(worker, env.getProcedureScheduler()); 098 if (!finished) { 099 LOG.warn("Failed to split wal {} by server {}, retry...", walPath, worker); 100 setNextState(MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER); 101 return Flow.HAS_MORE_STATE; 102 } 103 ServerCrashProcedure.updateProgress(env, getParentProcId()); 104 return Flow.NO_MORE_STATE; 105 default: 106 throw new UnsupportedOperationException("unhandled state=" + state); 107 } 108 } 109 110 @Override 111 protected void rollbackState(MasterProcedureEnv env, 112 MasterProcedureProtos.SplitWALState splitOneWalState) 113 throws IOException, InterruptedException { 114 if (splitOneWalState == getInitialState()) { 115 return; 116 } 117 throw new UnsupportedOperationException(); 118 } 119 120 @Override 121 protected MasterProcedureProtos.SplitWALState getState(int stateId) { 122 return MasterProcedureProtos.SplitWALState.forNumber(stateId); 123 } 124 125 @Override 126 protected int getStateId(MasterProcedureProtos.SplitWALState state) { 127 return state.getNumber(); 128 } 129 130 @Override 131 protected MasterProcedureProtos.SplitWALState getInitialState() { 132 return MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER; 133 } 134 135 @Override 136 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 137 super.serializeStateData(serializer); 138 MasterProcedureProtos.SplitWALData.Builder builder = 139 MasterProcedureProtos.SplitWALData.newBuilder(); 140 builder.setWalPath(walPath).setCrashedServer(ProtobufUtil.toServerName(crashedServer)); 141 if (worker != null) { 142 builder.setWorker(ProtobufUtil.toServerName(worker)); 143 } 144 serializer.serialize(builder.build()); 145 } 146 147 @Override 148 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 149 super.deserializeStateData(serializer); 150 MasterProcedureProtos.SplitWALData data = 151 serializer.deserialize(MasterProcedureProtos.SplitWALData.class); 152 walPath = data.getWalPath(); 153 crashedServer = ProtobufUtil.toServerName(data.getCrashedServer()); 154 if (data.hasWorker()) { 155 worker = ProtobufUtil.toServerName(data.getWorker()); 156 } 157 } 158 159 @Override 160 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 161 setState(ProcedureProtos.ProcedureState.RUNNABLE); 162 env.getProcedureScheduler().addFront(this); 163 return false; 164 } 165 166 public String getWAL() { 167 return walPath; 168 } 169 170 @VisibleForTesting 171 public ServerName getWorker(){ 172 return worker; 173 } 174 175 @Override 176 public ServerName getServerName() { 177 return this.crashedServer; 178 } 179 180 @Override 181 public boolean hasMetaTableRegion() { 182 return AbstractFSWALProvider.isMetaFile(new Path(walPath)); 183 } 184 185 @Override 186 public ServerOperationType getServerOperationType() { 187 return ServerOperationType.SPLIT_WAL; 188 } 189 190 @Override 191 protected void afterReplay(MasterProcedureEnv env){ 192 if(worker != null){ 193 env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker); 194 } 195 196 } 197}