001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import com.google.errorprone.annotations.RestrictedApi; 021import java.io.IOException; 022import java.util.Optional; 023import org.apache.hadoop.hbase.ServerName; 024import org.apache.hadoop.hbase.TableName; 025import org.apache.hadoop.hbase.client.RegionInfo; 026import org.apache.hadoop.hbase.master.RegionState; 027import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 028import org.apache.hadoop.hbase.master.assignment.RegionStates; 029import org.apache.hadoop.hbase.master.assignment.ServerState; 030import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; 031import org.apache.hadoop.hbase.procedure2.Procedure; 032import org.apache.hadoop.hbase.procedure2.ProcedureEvent; 033import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 034import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 035import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 036import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 037import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; 038import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure; 039import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; 040import org.apache.hadoop.hbase.regionserver.SnapshotRegionCallable; 041import org.apache.hadoop.hbase.util.RetryCounter; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 048import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotRegionProcedureStateData; 049import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; 050import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 051 052/** 053 * A remote procedure which is used to send region snapshot request to region server. The basic 054 * logic of SnapshotRegionProcedure is similar like {@link ServerRemoteProcedure}, only with a 055 * little difference, when {@link FailedRemoteDispatchException} was thrown, SnapshotRegionProcedure 056 * will sleep some time and continue retrying until success. 057 */ 058@InterfaceAudience.Private 059public class SnapshotRegionProcedure extends Procedure<MasterProcedureEnv> 060 implements TableProcedureInterface, RemoteProcedure<MasterProcedureEnv, ServerName> { 061 private static final Logger LOG = LoggerFactory.getLogger(SnapshotRegionProcedure.class); 062 063 private SnapshotDescription snapshot; 064 private ProcedureEvent<?> event; 065 private RegionInfo region; 066 private boolean dispatched; 067 private boolean succ; 068 private RetryCounter retryCounter; 069 070 public SnapshotRegionProcedure() { 071 } 072 073 public SnapshotRegionProcedure(SnapshotDescription snapshot, RegionInfo region) { 074 this.snapshot = snapshot; 075 this.region = region; 076 } 077 078 @Override 079 protected LockState acquireLock(final MasterProcedureEnv env) { 080 if (env.getProcedureScheduler().waitRegions(this, getTableName(), region)) { 081 return LockState.LOCK_EVENT_WAIT; 082 } 083 return LockState.LOCK_ACQUIRED; 084 } 085 086 @Override 087 protected void releaseLock(final MasterProcedureEnv env) { 088 env.getProcedureScheduler().wakeRegions(this, getTableName(), region); 089 } 090 091 @Override 092 protected boolean holdLock(MasterProcedureEnv env) { 093 return false; 094 } 095 096 @Override 097 public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) { 098 return Optional 099 .of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), SnapshotRegionCallable.class, 100 MasterProcedureProtos.SnapshotRegionParameter.newBuilder() 101 .setRegion(ProtobufUtil.toRegionInfo(region)).setSnapshot(snapshot).build().toByteArray(), 102 env.getMasterServices().getMasterActiveTime())); 103 } 104 105 @Override 106 public void remoteCallFailed(MasterProcedureEnv env, ServerName serverName, IOException e) { 107 complete(env, e); 108 } 109 110 @Override 111 public void remoteOperationCompleted(MasterProcedureEnv env) { 112 complete(env, null); 113 } 114 115 @Override 116 public void remoteOperationFailed(MasterProcedureEnv env, RemoteProcedureException e) { 117 complete(env, e); 118 } 119 120 // keep retrying until success 121 private void complete(MasterProcedureEnv env, Throwable error) { 122 if (isFinished()) { 123 LOG.info("This procedure {} is already finished, skip the rest processes", this.getProcId()); 124 return; 125 } 126 if (event == null) { 127 LOG.warn("procedure event for {} is null, maybe the procedure is created when recovery", 128 getProcId()); 129 return; 130 } 131 if (error == null) { 132 LOG.info("finish snapshot {} on region {}", snapshot.getName(), region.getEncodedName()); 133 succ = true; 134 } 135 136 event.wake(env.getProcedureScheduler()); 137 event = null; 138 } 139 140 @Override 141 public TableName getTableName() { 142 return region.getTable(); 143 } 144 145 @Override 146 public TableOperationType getTableOperationType() { 147 return TableOperationType.REGION_SNAPSHOT; 148 } 149 150 @Override 151 protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) 152 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 153 if (dispatched) { 154 if (succ) { 155 return null; 156 } 157 dispatched = false; 158 } 159 160 RegionStates regionStates = env.getAssignmentManager().getRegionStates(); 161 RegionStateNode regionNode = regionStates.getRegionStateNode(region); 162 regionNode.lock(); 163 try { 164 if (regionNode.getProcedure() != null) { 165 setTimeoutForSuspend(env, String.format("region %s has a TRSP attached %s", 166 region.getRegionNameAsString(), regionNode.getProcedure())); 167 throw new ProcedureSuspendedException(); 168 } 169 if (!regionNode.isInState(RegionState.State.OPEN)) { 170 setTimeoutForSuspend(env, String.format("region state of %s is %s", 171 region.getRegionNameAsString(), regionNode.getState())); 172 throw new ProcedureSuspendedException(); 173 } 174 ServerName targetServer = regionNode.getRegionLocation(); 175 if (targetServer == null) { 176 setTimeoutForSuspend(env, 177 String.format("target server of region %s is null", region.getRegionNameAsString())); 178 throw new ProcedureSuspendedException(); 179 } 180 ServerState serverState = regionStates.getServerNode(targetServer).getState(); 181 if (serverState != ServerState.ONLINE) { 182 setTimeoutForSuspend(env, String.format("target server of region %s %s is in state %s", 183 region.getRegionNameAsString(), targetServer, serverState)); 184 throw new ProcedureSuspendedException(); 185 } 186 try { 187 env.getRemoteDispatcher().addOperationToNode(targetServer, this); 188 dispatched = true; 189 event = new ProcedureEvent<>(this); 190 event.suspendIfNotReady(this); 191 throw new ProcedureSuspendedException(); 192 } catch (FailedRemoteDispatchException e) { 193 setTimeoutForSuspend(env, "Failed send request to " + targetServer); 194 throw new ProcedureSuspendedException(); 195 } 196 } finally { 197 regionNode.unlock(); 198 } 199 } 200 201 @Override 202 protected void rollback(MasterProcedureEnv env) { 203 throw new UnsupportedOperationException(); 204 } 205 206 private void setTimeoutForSuspend(MasterProcedureEnv env, String reason) { 207 if (retryCounter == null) { 208 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 209 } 210 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 211 LOG.warn("{} can not run currently because {}, wait {} ms to retry", this, reason, backoff); 212 setTimeout(Math.toIntExact(backoff)); 213 setState(ProcedureState.WAITING_TIMEOUT); 214 skipPersistence(); 215 } 216 217 @Override 218 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 219 setState(ProcedureState.RUNNABLE); 220 env.getProcedureScheduler().addFront(this); 221 return false; 222 } 223 224 @Override 225 protected boolean abort(MasterProcedureEnv env) { 226 return false; 227 } 228 229 @Override 230 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 231 SnapshotRegionProcedureStateData.Builder builder = 232 SnapshotRegionProcedureStateData.newBuilder(); 233 builder.setSnapshot(snapshot); 234 builder.setRegion(ProtobufUtil.toRegionInfo(region)); 235 serializer.serialize(builder.build()); 236 } 237 238 @Override 239 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 240 SnapshotRegionProcedureStateData data = 241 serializer.deserialize(SnapshotRegionProcedureStateData.class); 242 this.snapshot = data.getSnapshot(); 243 this.region = ProtobufUtil.toRegionInfo(data.getRegion()); 244 } 245 246 @Override 247 public String getProcName() { 248 return getClass().getSimpleName() + " " + region.getEncodedName(); 249 } 250 251 @Override 252 protected void toStringClassDetails(StringBuilder builder) { 253 builder.append(getProcName()); 254 } 255 256 @Override 257 protected boolean waitInitialized(MasterProcedureEnv env) { 258 return env.waitInitialized(this); 259 } 260 261 public RegionInfo getRegion() { 262 return region; 263 } 264 265 @RestrictedApi(explanation = "Should only be called in tests", link = "", 266 allowedOnPath = ".*(/src/test/.*|TestSnapshotProcedure).java") 267 boolean inRetrying() { 268 return retryCounter != null; 269 } 270}