View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.client;
21  
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.lang.reflect.UndeclaredThrowableException;
25  import java.net.SocketTimeoutException;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.concurrent.atomic.AtomicBoolean;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.DoNotRetryIOException;
34  import org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException;
35  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
36  import org.apache.hadoop.hbase.util.ExceptionUtil;
37  import org.apache.hadoop.ipc.RemoteException;
38  
39  import com.google.protobuf.ServiceException;
40  
41  /**
42   * Runs an rpc'ing {@link RetryingCallable}. Sets into rpc client
43   * threadlocal outstanding timeouts as so we don't persist too much.
44   * Dynamic rather than static so can set the generic appropriately.
45   *
46   * This object has a state. It should not be used by in parallel by different threads.
47   * Reusing it is possible however, even between multiple threads. However, the user will
48   *  have to manage the synchronization on its side: there is no synchronization inside the class.
49   */
50  @InterfaceAudience.Private
51  public class RpcRetryingCaller<T> {
52    public static final Log LOG = LogFactory.getLog(RpcRetryingCaller.class);
53    /**
54     * When we started making calls.
55     */
56    private long globalStartTime;
57    /**
58     * Start and end times for a single call.
59     */
60    private final static int MIN_RPC_TIMEOUT = 2000;
61    /** How many retries are allowed before we start to log */
62    private final int startLogErrorsCnt;
63  
64    private final long pause;
65    private final int retries;
66    private final int rpcTimeout;// timeout for each rpc request
67    private final AtomicBoolean cancelled = new AtomicBoolean(false);
68    private final RetryingCallerInterceptor interceptor;
69    private final RetryingCallerInterceptorContext context;
70  
71    public RpcRetryingCaller(long pause, int retries, int startLogErrorsCnt) {
72      this(pause, retries, RetryingCallerInterceptorFactory.NO_OP_INTERCEPTOR, startLogErrorsCnt, 0);
73    }
74  
75    public RpcRetryingCaller(long pause, int retries,
76        RetryingCallerInterceptor interceptor, int startLogErrorsCnt, int rpcTimeout) {
77      this.pause = pause;
78      this.retries = retries;
79      this.interceptor = interceptor;
80      context = interceptor.createEmptyContext();
81      this.startLogErrorsCnt = startLogErrorsCnt;
82      this.rpcTimeout = rpcTimeout;
83    }
84  
85    private int getRemainingTime(int callTimeout) {
86      if (callTimeout <= 0) {
87        return 0;
88      } else {
89        if (callTimeout == Integer.MAX_VALUE) return Integer.MAX_VALUE;
90        int remainingTime = (int) (callTimeout -
91            (EnvironmentEdgeManager.currentTime() - this.globalStartTime));
92        if (remainingTime < MIN_RPC_TIMEOUT) {
93          // If there is no time left, we're trying anyway. It's too late.
94          // 0 means no timeout, and it's not the intent here. So we secure both cases by
95          // resetting to the minimum.
96          remainingTime = MIN_RPC_TIMEOUT;
97        }
98        return remainingTime;
99      }
100   }
101 
102   private int getTimeout(int callTimeout){
103     int timeout = getRemainingTime(callTimeout);
104     if (timeout <= 0 || rpcTimeout > 0 && rpcTimeout < timeout){
105       timeout = rpcTimeout;
106     }
107     return timeout;
108   }
109 
110   public void cancel(){
111     synchronized (cancelled){
112       cancelled.set(true);
113       cancelled.notifyAll();
114     }
115   }
116 
117   /**
118    * Retries if invocation fails.
119    * @param callTimeout Timeout for this call
120    * @param callable The {@link RetryingCallable} to run.
121    * @return an object of type T
122    * @throws IOException if a remote or network exception occurs
123    * @throws RuntimeException other unspecified error
124    */
125   public T callWithRetries(RetryingCallable<T> callable, int callTimeout)
126   throws IOException, RuntimeException {
127     List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions =
128       new ArrayList<RetriesExhaustedException.ThrowableWithExtraContext>();
129     this.globalStartTime = EnvironmentEdgeManager.currentTime();
130     context.clear();
131     for (int tries = 0;; tries++) {
132       long expectedSleep;
133       try {
134         callable.prepare(tries != 0); // if called with false, check table status on ZK
135         interceptor.intercept(context.prepare(callable, tries));
136         return callable.call(getTimeout(callTimeout));
137       } catch (PreemptiveFastFailException e) {
138         throw e;
139       } catch (Throwable t) {
140         ExceptionUtil.rethrowIfInterrupt(t);
141         if (tries > startLogErrorsCnt) {
142           LOG.info("Call exception, tries=" + tries + ", retries=" + retries + ", started=" +
143               (EnvironmentEdgeManager.currentTime() - this.globalStartTime) + " ms ago, "
144               + "cancelled=" + cancelled.get() + ", msg="
145               + callable.getExceptionMessageAdditionalDetail());
146         }
147 
148         // translateException throws exception when should not retry: i.e. when request is bad.
149         interceptor.handleFailure(context, t);
150         t = translateException(t);
151         callable.throwable(t, retries != 1);
152         RetriesExhaustedException.ThrowableWithExtraContext qt =
153             new RetriesExhaustedException.ThrowableWithExtraContext(t,
154                 EnvironmentEdgeManager.currentTime(), toString());
155         exceptions.add(qt);
156         if (tries >= retries - 1) {
157           throw new RetriesExhaustedException(tries, exceptions);
158         }
159         // If the server is dead, we need to wait a little before retrying, to give
160         //  a chance to the regions to be
161         // get right pause time, start by RETRY_BACKOFF[0] * pause
162         expectedSleep = callable.sleep(pause, tries);
163 
164         // If, after the planned sleep, there won't be enough time left, we stop now.
165         long duration = singleCallDuration(expectedSleep);
166         if (duration > callTimeout) {
167           String msg = "callTimeout=" + callTimeout + ", callDuration=" + duration +
168               ": " + callable.getExceptionMessageAdditionalDetail();
169           throw (SocketTimeoutException)(new SocketTimeoutException(msg).initCause(t));
170         }
171       } finally {
172         interceptor.updateFailureInfo(context);
173       }
174       try {
175         if (expectedSleep > 0) {
176           synchronized (cancelled) {
177             if (cancelled.get()) return null;
178             cancelled.wait(expectedSleep);
179           }
180         }
181         if (cancelled.get()) return null;
182       } catch (InterruptedException e) {
183         throw new InterruptedIOException("Interrupted after " + tries + " tries  on " + retries);
184       }
185     }
186   }
187 
188   /**
189    * @return Calculate how long a single call took
190    */
191   private long singleCallDuration(final long expectedSleep) {
192     return (EnvironmentEdgeManager.currentTime() - this.globalStartTime) + expectedSleep;
193   }
194 
195   /**
196    * Call the server once only.
197    * {@link RetryingCallable} has a strange shape so we can do retrys.  Use this invocation if you
198    * want to do a single call only (A call to {@link RetryingCallable#call(int)} will not likely
199    * succeed).
200    * @return an object of type T
201    * @throws IOException if a remote or network exception occurs
202    * @throws RuntimeException other unspecified error
203    */
204   public T callWithoutRetries(RetryingCallable<T> callable, int callTimeout)
205   throws IOException, RuntimeException {
206     // The code of this method should be shared with withRetries.
207     this.globalStartTime = EnvironmentEdgeManager.currentTime();
208     try {
209       callable.prepare(false);
210       return callable.call(callTimeout);
211     } catch (Throwable t) {
212       Throwable t2 = translateException(t);
213       ExceptionUtil.rethrowIfInterrupt(t2);
214       // It would be nice to clear the location cache here.
215       if (t2 instanceof IOException) {
216         throw (IOException)t2;
217       } else {
218         throw new RuntimeException(t2);
219       }
220     }
221   }
222 
223   /**
224    * Get the good or the remote exception if any, throws the DoNotRetryIOException.
225    * @param t the throwable to analyze
226    * @return the translated exception, if it's not a DoNotRetryIOException
227    * @throws DoNotRetryIOException - if we find it, we throw it instead of translating.
228    */
229   static Throwable translateException(Throwable t) throws DoNotRetryIOException {
230     if (t instanceof UndeclaredThrowableException) {
231       if (t.getCause() != null) {
232         t = t.getCause();
233       }
234     }
235     if (t instanceof RemoteException) {
236       t = ((RemoteException)t).unwrapRemoteException();
237     }
238     if (t instanceof LinkageError) {
239       throw new DoNotRetryIOException(t);
240     }
241     if (t instanceof ServiceException) {
242       ServiceException se = (ServiceException)t;
243       Throwable cause = se.getCause();
244       if (cause != null) {
245         if (cause instanceof DoNotRetryIOException) {
246           throw (DoNotRetryIOException)cause;
247         } else if (cause instanceof NeedUnmanagedConnectionException) {
248           throw new DoNotRetryIOException(cause);
249         }
250       }
251       // Don't let ServiceException out; its rpc specific.
252       t = cause;
253       // t could be a RemoteException so go aaround again.
254       translateException(t);
255     } else if (t instanceof DoNotRetryIOException) {
256       throw (DoNotRetryIOException)t;
257     } else if (t instanceof NeedUnmanagedConnectionException) {
258       throw new DoNotRetryIOException(t);
259     }
260     return t;
261   }
262 
263   @Override
264   public String toString() {
265     return "RpcRetryingCaller{" + "globalStartTime=" + globalStartTime +
266         ", pause=" + pause + ", retries=" + retries + '}';
267   }
268 }