001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.procedure;
019
020import java.io.IOException;
021import java.util.Collection;
022import java.util.HashSet;
023import java.util.List;
024import java.util.Set;
025import java.util.concurrent.ConcurrentMap;
026import java.util.concurrent.ExecutorService;
027import java.util.concurrent.RejectedExecutionException;
028import java.util.concurrent.SynchronousQueue;
029import java.util.concurrent.ThreadPoolExecutor;
030import java.util.concurrent.TimeUnit;
031import org.apache.hadoop.hbase.errorhandling.ForeignException;
032import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
033import org.apache.hadoop.hbase.util.Threads;
034import org.apache.yetus.audience.InterfaceAudience;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import org.apache.hbase.thirdparty.com.google.common.collect.MapMaker;
039import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
040
041/**
042 * This is the master side of a distributed complex procedure execution.
043 * <p>
044 * The {@link Procedure} is generic and subclassing or customization shouldn't be necessary -- any
045 * customization should happen just in {@link Subprocedure}s.
046 */
047@InterfaceAudience.Private
048public class ProcedureCoordinator {
049  private static final Logger LOG = LoggerFactory.getLogger(ProcedureCoordinator.class);
050
051  final static long KEEP_ALIVE_MILLIS_DEFAULT = 5000;
052  final static long TIMEOUT_MILLIS_DEFAULT = 60000;
053  final static long WAKE_MILLIS_DEFAULT = 500;
054
055  private final ProcedureCoordinatorRpcs rpcs;
056  private final ExecutorService pool;
057  private final long wakeTimeMillis;
058  private final long timeoutMillis;
059
060  // Running procedure table. Maps procedure name to running procedure reference
061  private final ConcurrentMap<String, Procedure> procedures =
062    new MapMaker().concurrencyLevel(4).weakValues().makeMap();
063
064  /**
065   * Create and start a ProcedureCoordinator. The rpc object registers the ProcedureCoordinator and
066   * starts any threads in this constructor.
067   * @param pool Used for executing procedures.
068   */
069  // Only used in tests. SimpleMasterProcedureManager is a test class.
070  public ProcedureCoordinator(ProcedureCoordinatorRpcs rpcs, ThreadPoolExecutor pool) {
071    this(rpcs, pool, TIMEOUT_MILLIS_DEFAULT, WAKE_MILLIS_DEFAULT);
072  }
073
074  /**
075   * Create and start a ProcedureCoordinator. The rpc object registers the ProcedureCoordinator and
076   * starts any threads in this constructor.
077   * @param pool Used for executing procedures.
078   */
079  public ProcedureCoordinator(ProcedureCoordinatorRpcs rpcs, ThreadPoolExecutor pool,
080    long timeoutMillis, long wakeTimeMillis) {
081    this.timeoutMillis = timeoutMillis;
082    this.wakeTimeMillis = wakeTimeMillis;
083    this.rpcs = rpcs;
084    this.pool = pool;
085    this.rpcs.start(this);
086  }
087
088  /**
089   * Default thread pool for the procedure n * @param opThreads the maximum number of threads to
090   * allow in the pool
091   */
092  public static ThreadPoolExecutor defaultPool(String coordName, int opThreads) {
093    return defaultPool(coordName, opThreads, KEEP_ALIVE_MILLIS_DEFAULT);
094  }
095
096  /**
097   * Default thread pool for the procedure n * @param opThreads the maximum number of threads to
098   * allow in the pool
099   * @param keepAliveMillis the maximum time (ms) that excess idle threads will wait for new tasks
100   */
101  public static ThreadPoolExecutor defaultPool(String coordName, int opThreads,
102    long keepAliveMillis) {
103    return new ThreadPoolExecutor(1, opThreads, keepAliveMillis, TimeUnit.MILLISECONDS,
104      new SynchronousQueue<>(),
105      new ThreadFactoryBuilder().setNameFormat("(" + coordName + ")-proc-coordinator-pool-%d")
106        .setDaemon(true).setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
107  }
108
109  /**
110   * Shutdown the thread pools and release rpc resources n
111   */
112  public void close() throws IOException {
113    // have to use shutdown now to break any latch waiting
114    pool.shutdownNow();
115    rpcs.close();
116  }
117
118  /**
119   * Submit an procedure to kick off its dependent subprocedures.
120   * @param proc Procedure to execute
121   * @return <tt>true</tt> if the procedure was started correctly, <tt>false</tt> if the procedure
122   *         or any subprocedures could not be started. Failure could be due to submitting a
123   *         procedure multiple times (or one with the same name), or some sort of IO problem. On
124   *         errors, the procedure's monitor holds a reference to the exception that caused the
125   *         failure.
126   */
127  boolean submitProcedure(Procedure proc) {
128    // if the submitted procedure was null, then we don't want to run it
129    if (proc == null) {
130      return false;
131    }
132    String procName = proc.getName();
133
134    // make sure we aren't already running a procedure of that name
135    Procedure oldProc = procedures.get(procName);
136    if (oldProc != null) {
137      // procedures are always eventually completed on both successful and failed execution
138      try {
139        if (!oldProc.isCompleted()) {
140          LOG.warn("Procedure " + procName + " currently running.  Rejecting new request");
141          return false;
142        } else {
143          LOG.debug("Procedure " + procName
144            + " was in running list but was completed.  Accepting new attempt.");
145          if (!procedures.remove(procName, oldProc)) {
146            LOG.warn("Procedure " + procName
147              + " has been resubmitted by another thread. Rejecting this request.");
148            return false;
149          }
150        }
151      } catch (ForeignException e) {
152        LOG.debug("Procedure " + procName
153          + " was in running list but has exception.  Accepting new attempt.");
154        if (!procedures.remove(procName, oldProc)) {
155          LOG.warn("Procedure " + procName
156            + " has been resubmitted by another thread. Rejecting this request.");
157          return false;
158        }
159      }
160    }
161
162    // kick off the procedure's execution in a separate thread
163    try {
164      if (this.procedures.putIfAbsent(procName, proc) == null) {
165        LOG.debug("Submitting procedure " + procName);
166        this.pool.submit(proc);
167        return true;
168      } else {
169        LOG.error(
170          "Another thread has submitted procedure '" + procName + "'. Ignoring this attempt.");
171        return false;
172      }
173    } catch (RejectedExecutionException e) {
174      LOG.warn("Procedure " + procName + " rejected by execution pool.  Propagating error.", e);
175      // Remove the procedure from the list since is not started
176      this.procedures.remove(procName, proc);
177      // the thread pool is full and we can't run the procedure
178      proc.receive(new ForeignException(procName, e));
179    }
180    return false;
181  }
182
183  /**
184   * The connection to the rest of the procedure group (members and coordinator) has been
185   * broken/lost/failed. This should fail any interested procedures, but not attempt to notify other
186   * members since we cannot reach them anymore.
187   * @param message description of the error
188   * @param cause   the actual cause of the failure
189   */
190  void rpcConnectionFailure(final String message, final IOException cause) {
191    Collection<Procedure> toNotify = procedures.values();
192
193    boolean isTraceEnabled = LOG.isTraceEnabled();
194    LOG.debug("received connection failure: " + message, cause);
195    for (Procedure proc : toNotify) {
196      if (proc == null) {
197        continue;
198      }
199      // notify the elements, if they aren't null
200      if (isTraceEnabled) {
201        LOG.trace("connection failure - notify procedure: " + proc.getName());
202      }
203      proc.receive(new ForeignException(proc.getName(), cause));
204    }
205  }
206
207  /**
208   * Abort the procedure with the given name
209   * @param procName name of the procedure to abort
210   * @param reason   serialized information about the abort
211   */
212  public void abortProcedure(String procName, ForeignException reason) {
213    LOG.debug("abort procedure " + procName, reason);
214    // if we know about the Procedure, notify it
215    Procedure proc = procedures.get(procName);
216    if (proc == null) {
217      return;
218    }
219    proc.receive(reason);
220  }
221
222  /**
223   * Exposed for hooking with unit tests. nnn * @return the newly created procedure
224   */
225  Procedure createProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs,
226    List<String> expectedMembers) {
227    // build the procedure
228    return new Procedure(this, fed, wakeTimeMillis, timeoutMillis, procName, procArgs,
229      expectedMembers);
230  }
231
232  /**
233   * Kick off the named procedure Currently only one procedure with the same type and name is
234   * allowed to run at a time.
235   * @param procName        name of the procedure to start
236   * @param procArgs        arguments for the procedure
237   * @param expectedMembers expected members to start
238   * @return handle to the running procedure, if it was started correctly, <tt>null</tt> otherwise.
239   *         Null could be due to submitting a procedure multiple times (or one with the same name),
240   *         or runtime exception. Check the procedure's monitor that holds a reference to the
241   *         exception that caused the failure.
242   */
243  public Procedure startProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs,
244    List<String> expectedMembers) {
245    Procedure proc = createProcedure(fed, procName, procArgs, expectedMembers);
246    if (!this.submitProcedure(proc)) {
247      LOG.error("Failed to submit procedure '" + procName + "'");
248      return null;
249    }
250    return proc;
251  }
252
253  /**
254   * Notification that the procedure had the specified member acquired its part of the barrier via
255   * {@link Subprocedure#acquireBarrier()}.
256   * @param procName name of the procedure that acquired
257   * @param member   name of the member that acquired
258   */
259  void memberAcquiredBarrier(String procName, final String member) {
260    Procedure proc = procedures.get(procName);
261    if (proc == null) {
262      LOG.warn(
263        "Member '" + member + "' is trying to acquire an unknown procedure '" + procName + "'");
264      return;
265    }
266    if (LOG.isTraceEnabled()) {
267      LOG.trace("Member '" + member + "' acquired procedure '" + procName + "'");
268    }
269    proc.barrierAcquiredByMember(member);
270  }
271
272  /**
273   * Notification that the procedure had another member finished executing its in-barrier subproc
274   * via {@link Subprocedure#insideBarrier()}.
275   * @param procName       name of the subprocedure that finished
276   * @param member         name of the member that executed and released its barrier
277   * @param dataFromMember the data that the member returned along with the notification
278   */
279  void memberFinishedBarrier(String procName, final String member, byte[] dataFromMember) {
280    Procedure proc = procedures.get(procName);
281    if (proc == null) {
282      LOG.warn(
283        "Member '" + member + "' is trying to release an unknown procedure '" + procName + "'");
284      return;
285    }
286    if (LOG.isTraceEnabled()) {
287      LOG.trace("Member '" + member + "' released procedure '" + procName + "'");
288    }
289    proc.barrierReleasedByMember(member, dataFromMember);
290  }
291
292  /** Returns the rpcs implementation for all current procedures */
293  ProcedureCoordinatorRpcs getRpcs() {
294    return rpcs;
295  }
296
297  /**
298   * Returns the procedure. This Procedure is a live instance so should not be modified but can be
299   * inspected.
300   * @param name Name of the procedure
301   * @return Procedure or null if not present any more
302   */
303  public Procedure getProcedure(String name) {
304    return procedures.get(name);
305  }
306
307  /** Returns Return set of all procedure names. */
308  public Set<String> getProcedureNames() {
309    return new HashSet<>(procedures.keySet());
310  }
311}