001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.Optional; 022import org.apache.hadoop.hbase.ServerName; 023import org.apache.hadoop.hbase.TableName; 024import org.apache.hadoop.hbase.client.RegionInfo; 025import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; 026import org.apache.hadoop.hbase.procedure2.Procedure; 027import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 028import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 029import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 030import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 031import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; 032import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; 033import org.apache.hadoop.hbase.regionserver.SnapshotVerifyCallable; 034import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; 035import org.apache.hadoop.hbase.util.ForeignExceptionUtil; 036import org.apache.hadoop.hbase.util.RetryCounter; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 042import org.apache.hadoop.hbase.shaded.protobuf.generated.ErrorHandlingProtos; 043import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyParameter; 045import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyProcedureStateData; 046import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 047import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 048 049/** 050 * A remote procedure which is used to send verify snapshot request to region server. 051 */ 052@InterfaceAudience.Private 053public class SnapshotVerifyProcedure extends ServerRemoteProcedure 054 implements TableProcedureInterface { 055 private static final Logger LOG = LoggerFactory.getLogger(SnapshotVerifyProcedure.class); 056 057 private SnapshotDescription snapshot; 058 private RegionInfo region; 059 060 private RetryCounter retryCounter; 061 062 public SnapshotVerifyProcedure() { 063 } 064 065 public SnapshotVerifyProcedure(SnapshotDescription snapshot, RegionInfo region) { 066 this.snapshot = snapshot; 067 this.region = region; 068 } 069 070 @Override 071 protected void rollback(MasterProcedureEnv env) { 072 // nothing to rollback 073 } 074 075 @Override 076 protected boolean abort(MasterProcedureEnv env) { 077 return false; 078 } 079 080 @Override 081 protected synchronized boolean complete(MasterProcedureEnv env, Throwable error) { 082 boolean isProcedureCompleted = false; 083 try { 084 if (error != null) { 085 if (error instanceof RemoteProcedureException) { 086 // remote operation failed 087 Throwable remoteEx = unwrapRemoteProcedureException((RemoteProcedureException) error); 088 if (remoteEx instanceof CorruptedSnapshotException) { 089 // snapshot is corrupted, will touch a flag file and finish the procedure 090 isProcedureCompleted = true; 091 SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor() 092 .getProcedure(SnapshotProcedure.class, getParentProcId()); 093 if (parent != null) { 094 parent.markSnapshotCorrupted(); 095 } 096 } // else unexpected exception in remote server, will retry on other servers, 097 // procedureCompleted will stay false 098 } // else the mostly like thing is that remote call failed, will retry on other servers, 099 // procedureCompleted will stay false 100 } else { 101 // remote operation finished without error 102 isProcedureCompleted = true; 103 } 104 } catch (IOException e) { 105 // if we can't create the flag file, then mark the current procedure as FAILED 106 // and rollback the whole snapshot procedure stack. 107 LOG.warn("Failed create corrupted snapshot flag file for snapshot={}, region={}", 108 snapshot.getName(), region, e); 109 setFailure("verify-snapshot", e); 110 } finally { 111 // release the worker 112 env.getMasterServices().getSnapshotManager().releaseSnapshotVerifyWorker(this, targetServer); 113 } 114 return isProcedureCompleted; 115 } 116 117 // we will wrap remote exception into a RemoteProcedureException, 118 // here we try to unwrap it 119 private Throwable unwrapRemoteProcedureException(RemoteProcedureException e) { 120 return e.getCause(); 121 } 122 123 @Override 124 protected synchronized Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) 125 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 126 try { 127 // if we've already known the snapshot is corrupted, then stop scheduling 128 // the new procedures and the undispatched procedures 129 if ( 130 state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH 131 ) { 132 SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor() 133 .getProcedure(SnapshotProcedure.class, getParentProcId()); 134 if (parent != null && parent.isSnapshotCorrupted()) { 135 return null; 136 } 137 } 138 // acquire a worker 139 if ( 140 state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH 141 && targetServer == null 142 ) { 143 targetServer = 144 env.getMasterServices().getSnapshotManager().acquireSnapshotVerifyWorker(this); 145 } 146 // send remote request 147 Procedure<MasterProcedureEnv>[] res = super.execute(env); 148 // retry if necessary 149 if ( 150 state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH 151 ) { 152 // the mostly like thing is that a FailedRemoteDispatchException is thrown. 153 // we need to retry on another remote server 154 targetServer = null; 155 throw new FailedRemoteDispatchException("Failed sent request"); 156 } else { 157 // the request was successfully dispatched 158 return res; 159 } 160 } catch (IOException e) { 161 // there are some cases we need to retry: 162 // 1. we can't get response from hdfs 163 // 2. the remote server crashed 164 if (retryCounter == null) { 165 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 166 } 167 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 168 LOG.warn("Failed to get snapshot verify result , wait {} ms to retry", backoff, e); 169 setTimeout(Math.toIntExact(backoff)); 170 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 171 skipPersistence(); 172 throw new ProcedureSuspendedException(); 173 } 174 } 175 176 @Override 177 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 178 setState(ProcedureProtos.ProcedureState.RUNNABLE); 179 env.getProcedureScheduler().addFront(this); 180 return false; 181 } 182 183 @Override 184 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 185 SnapshotVerifyProcedureStateData.Builder builder = 186 SnapshotVerifyProcedureStateData.newBuilder(); 187 builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)).setState(state); 188 if (targetServer != null) { 189 builder.setTargetServer(ProtobufUtil.toServerName(targetServer)); 190 } 191 if (this.remoteError != null) { 192 ErrorHandlingProtos.ForeignExceptionMessage fem = 193 ForeignExceptionUtil.toProtoForeignException(remoteError); 194 builder.setError(fem); 195 } 196 serializer.serialize(builder.build()); 197 } 198 199 @Override 200 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 201 SnapshotVerifyProcedureStateData data = 202 serializer.deserialize(SnapshotVerifyProcedureStateData.class); 203 this.snapshot = data.getSnapshot(); 204 this.region = ProtobufUtil.toRegionInfo(data.getRegion()); 205 this.state = data.getState(); 206 if (data.hasTargetServer()) { 207 this.targetServer = ProtobufUtil.toServerName(data.getTargetServer()); 208 } 209 if (data.hasError()) { 210 this.remoteError = ForeignExceptionUtil.toException(data.getError()); 211 } 212 } 213 214 @Override 215 protected void toStringClassDetails(StringBuilder builder) { 216 builder.append(getClass().getSimpleName()).append(", snapshot=").append(snapshot.getName()); 217 if (targetServer != null) { 218 builder.append(", targetServer=").append(targetServer); 219 } 220 } 221 222 @Override 223 public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) { 224 SnapshotVerifyParameter.Builder builder = SnapshotVerifyParameter.newBuilder(); 225 builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)); 226 return Optional 227 .of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), SnapshotVerifyCallable.class, 228 builder.build().toByteArray(), env.getMasterServices().getMasterActiveTime())); 229 } 230 231 @Override 232 public TableName getTableName() { 233 return TableName.valueOf(snapshot.getTable()); 234 } 235 236 @Override 237 public TableOperationType getTableOperationType() { 238 return TableOperationType.SNAPSHOT; 239 } 240 241 public ServerName getServerName() { 242 return targetServer; 243 } 244}