001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import org.apache.hadoop.hbase.ServerName; 022import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; 023import org.apache.hadoop.hbase.procedure2.Procedure; 024import org.apache.hadoop.hbase.procedure2.ProcedureEvent; 025import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 026import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 027import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher; 028import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; 029import org.apache.yetus.audience.InterfaceAudience; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 034 035@InterfaceAudience.Private 036/** 037 * The base class for Procedures that run {@link java.util.concurrent.Callable}s on a (remote) 038 * RegionServer; e.g. asking a RegionServer to split a WAL file as a sub-procedure of the 039 * ServerCrashProcedure recovery process. 040 * <p> 041 * To implement a new Procedure type, extend this class and override remoteCallBuild() and 042 * complete(). The dispatch and callback will be handled for you here, internally. 043 * <p> 044 * The Procedure works as follows. It uses {@link RSProcedureDispatcher}, the same system used 045 * dispatching Region OPEN and CLOSE RPCs, to pass a Callable to a RegionServer. Examples include 046 * {@link org.apache.hadoop.hbase.regionserver.SplitWALCallable} and 047 * {@link org.apache.hadoop.hbase.replication.regionserver.RefreshPeerCallable}. Rather than 048 * assign/unassign, the Master calls #executeProcedures against the remote RegionServer wrapping a 049 * Callable in a {@link ExecuteProceduresRequest}. Upon successful dispatch, the Procedure then 050 * suspends itself on the Master-side and relinqushes its executor worker. On receipt, the 051 * RegionServer submits the Callable to its executor service. When the Callable completes, it adds 052 * itself to a queue on the RegionServer side for processing by a background thread, the 053 * {@link RemoteProcedureResultReporter}. It picks up the completed Callable from the queue and RPCs 054 * the master at #reportProcedureDone with the procedure id and whether success or failure. The 055 * master calls complete() setting success or failure state and then reschedules the suspended 056 * Procedure so it can finish. 057 * <p> 058 * Here are some details on operation: 059 * <p> 060 * If adding the operation to the dispatcher fails, addOperationToNode will throw 061 * FailedRemoteDispatchException, and this Procedure will return 'null'. The Procedure Executor will 062 * then mark this procedure as 'complete' (though we failed to dispatch our task). In this case, the 063 * upper layer of this procedure must have a way to check if this Procedure really succeeded or not 064 * and have appropriate handling. 065 * <p> 066 * If sending the operation to remote RS failed, dispatcher will call remoteCallFailed() to handle 067 * this which calls remoteOperationDone with the exception. If the targetServer crashed but this 068 * procedure has no response or if we receive failed response, then dispatcher will call 069 * remoteOperationFailed() which also calls remoteOperationDone with the exception. If the operation 070 * is successful, then remoteOperationCompleted will be called and actually calls the 071 * remoteOperationDone without exception. In remoteOperationDone, we'll check if the procedure is 072 * already get wake up by others. Then developer could implement complete() based on their own 073 * purpose. But basic logic is that if operation succeed, set succ to true and do the clean work. If 074 * operation failed and require to resend it to the same server, leave the succ as false. If 075 * operation failed and require to resend it to another server, set succ to true and upper layer 076 * should be able to find out this operation not work and send a operation to another server. 077 */ 078public abstract class ServerRemoteProcedure extends Procedure<MasterProcedureEnv> 079 implements RemoteProcedureDispatcher.RemoteProcedure<MasterProcedureEnv, ServerName> { 080 protected static final Logger LOG = LoggerFactory.getLogger(ServerRemoteProcedure.class); 081 protected ProcedureEvent<?> event; 082 protected ServerName targetServer; 083 // after remoteProcedureDone we require error field to decide the next state 084 protected Throwable remoteError; 085 protected MasterProcedureProtos.ServerRemoteProcedureState state = 086 MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH; 087 088 protected abstract boolean complete(MasterProcedureEnv env, Throwable error); 089 090 @Override 091 protected synchronized Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) 092 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 093 if ( 094 state != MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH 095 ) { 096 if (complete(env, this.remoteError)) { 097 return null; 098 } 099 state = MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH; 100 } 101 try { 102 env.getRemoteDispatcher().addOperationToNode(targetServer, this); 103 } catch (FailedRemoteDispatchException frde) { 104 LOG.warn("Can not send remote operation {} to {}, this operation will " 105 + "be retried to send to another server", this.getProcId(), targetServer); 106 return null; 107 } 108 event = new ProcedureEvent<>(this); 109 event.suspendIfNotReady(this); 110 throw new ProcedureSuspendedException(); 111 } 112 113 @Override 114 protected synchronized void completionCleanup(MasterProcedureEnv env) { 115 env.getRemoteDispatcher().removeCompletedOperation(targetServer, this); 116 } 117 118 @Override 119 public synchronized void remoteCallFailed(MasterProcedureEnv env, ServerName serverName, 120 IOException exception) { 121 state = MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH_FAIL; 122 remoteOperationDone(env, exception); 123 } 124 125 @Override 126 public synchronized void remoteOperationCompleted(MasterProcedureEnv env) { 127 state = MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_REPORT_SUCCEED; 128 remoteOperationDone(env, null); 129 } 130 131 @Override 132 public synchronized void remoteOperationFailed(MasterProcedureEnv env, 133 RemoteProcedureException error) { 134 state = MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_REPORT_FAILED; 135 remoteOperationDone(env, error); 136 } 137 138 synchronized void remoteOperationDone(MasterProcedureEnv env, Throwable error) { 139 if (this.isFinished()) { 140 LOG.info("This procedure {} is already finished, skip the rest processes", this.getProcId()); 141 return; 142 } 143 if (event == null) { 144 LOG.warn("procedure event for {} is null, maybe the procedure is created when recovery", 145 getProcId()); 146 return; 147 } 148 this.remoteError = error; 149 // below persistence is added so that if report goes to last active master, it throws exception 150 env.getMasterServices().getMasterProcedureExecutor().getStore().update(this); 151 event.wake(env.getProcedureScheduler()); 152 event = null; 153 } 154}