001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.http;
019
020import java.io.File;
021import java.io.IOException;
022import java.util.List;
023import java.util.concurrent.TimeUnit;
024import org.apache.hadoop.hbase.util.ProcessUtils;
025import org.apache.yetus.audience.InterfaceAudience;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029/**
030 * Abstraction over async-profiler execution. Implementations handle either the in-process Java API
031 * ({@link LibraryBackend}, when the maven dependency is on the classpath) or the external binary
032 * ({@link BinaryBackend}, when {@code ASYNC_PROFILER_HOME} is set).
033 * <p>
034 * This file deliberately contains no import of {@code one.profiler.AsyncProfiler}. That import is
035 * isolated in {@link LibraryBackend} so that binary-only deployments never trigger a
036 * {@code NoClassDefFoundError} when this class is loaded.
037 */
038@InterfaceAudience.Private
039interface ProfilerBackend {
040
041  Logger LOG = LoggerFactory.getLogger(ProfilerBackend.class);
042
043  /**
044   * Executes a profiling start command and returns the profiler's response.
045   */
046  String executeStart(ProfileServlet.ProfileRequest request, File outputFile) throws IOException;
047
048  /**
049   * Executes a profiling stop/dump command.
050   */
051  String executeStop(ProfileServlet.ProfileRequest request, File outputFile) throws IOException;
052
053  /**
054   * Cleans up any resources (e.g. kills a running process). Called on servlet destroy.
055   */
056  default void destroy() {
057  }
058
059  /**
060   * Detects which backend is available. Prefers {@link LibraryBackend} over {@link BinaryBackend}.
061   * Returns {@code null} if neither is available.
062   * <p>
063   * Detection runs <b>once</b> at class-load time (via {@code DETECTED_BACKEND} in
064   * {@link ProfileServlet}). A library that becomes loadable after the JVM starts requires a
065   * restart to be detected. A library that resolves at class-load but whose native binary is
066   * incompatible with the OS/kernel will not surface the error here — it will throw an
067   * {@code Error} or {@code RuntimeException} on the first {@code execute()} call at request time.
068   * <p>
069   * When both the library and a binary home are available, {@link LibraryBackend} is preferred and
070   * {@code ASYNC_PROFILER_HOME} is ignored.
071   * <p>
072   * {@link LibraryBackend} is instantiated reflectively so that its class — and therefore
073   * {@code one.profiler.AsyncProfiler} — is never loaded on systems where the JAR is absent.
074   */
075  static ProfilerBackend detect(String asyncProfilerHome) {
076    // 1. Try in-process Java API (optional maven dependency).
077    // Use Class.forName to probe without triggering a hard class-load of LibraryBackend,
078    // which would pull in one.profiler.AsyncProfiler and fail on binary-only systems.
079    try {
080      // Use the classloader that loaded this class so that isolated-classloader tests
081      // (which block one.profiler.*) correctly see the library as absent.
082      ClassLoader cl = ProfilerBackend.class.getClassLoader();
083      Class.forName("one.profiler.AsyncProfiler", false, cl);
084      // AsyncProfiler resolved — now safe to load LibraryBackend through the same loader
085      return (ProfilerBackend) Class
086        .forName("org.apache.hadoop.hbase.http.LibraryBackend", true, cl).getDeclaredConstructor()
087        .newInstance();
088    } catch (UnsatisfiedLinkError | ExceptionInInitializerError | ReflectiveOperationException e) {
089      // library not on classpath, native lib missing/incompatible, or static initializer failure
090      LOG.warn("async-profiler library not available ({}); falling back to BinaryBackend or"
091        + " DisabledServlet. Cause: {}", e.getClass().getSimpleName(), e.getMessage());
092    } catch (Throwable e) {
093      // Guard against any other unexpected failure during reflective instantiation so that a
094      // broken async-profiler installation falls back to DisabledServlet rather than crashing
095      // the daemon at DETECTED_BACKEND static-field initialization time.
096      LOG.warn("Unexpected error during async-profiler backend detection; "
097        + "profiling will be unavailable. Cause: {}", e.toString());
098    }
099    // 2. Try external binary
100    if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) {
101      return new BinaryBackend(asyncProfilerHome);
102    }
103    return null;
104  }
105}
106
107/**
108 * Backend that invokes the async-profiler binary ({@code asprof} / {@code profiler.sh}) as an
109 * external process. Requires {@code ASYNC_PROFILER_HOME} to be set.
110 */
111@InterfaceAudience.Private
112final class BinaryBackend implements ProfilerBackend {
113
114  private static final Logger LOG = LoggerFactory.getLogger(BinaryBackend.class);
115
116  private final String profilerHome;
117  private volatile Process process;
118
119  BinaryBackend(String profilerHome) {
120    this.profilerHome = profilerHome;
121  }
122
123  @Override
124  public String executeStart(ProfileServlet.ProfileRequest request, File outputFile)
125    throws IOException {
126    // Prefer the caller-supplied ?pid= param; fall back to ProcessHandle (Java 9+, always correct)
127    // rather than ProcessUtils.getPid() which reads the potentially-stale JVM_PID env variable.
128    int pid = request.getPid() != null ? request.getPid() : (int) ProcessHandle.current().pid();
129    List<String> cmd = ProfilerCommandMapper.toCliCommand(request, outputFile, profilerHome, pid);
130    process = ProcessUtils.runCmdAsync(cmd);
131    return "";
132  }
133
134  @Override
135  public String executeStop(ProfileServlet.ProfileRequest request, File outputFile)
136    throws IOException {
137    // The binary runs for the requested duration and exits on its own. Wait for it so the
138    // profiling flag is not cleared before the process has actually finished writing the output
139    // file — otherwise a new request could start a second asprof while the first is still running.
140    Process p = process;
141    if (p != null) {
142      // C6: waitFor() without a timeout can block indefinitely if asprof hangs (e.g. waiting for
143      // perf_event_open). Allow duration + 30 s slack; forcibly kill on timeout so the stopper
144      // thread can exit and profiling=false is eventually restored.
145      int timeoutSecs = request.getDuration() + 30;
146      try {
147        boolean finished = p.waitFor(timeoutSecs, TimeUnit.SECONDS);
148        if (!finished) {
149          LOG.warn("async-profiler process did not exit within {} s; forcibly killing it.",
150            timeoutSecs);
151          p.destroyForcibly();
152          throw new IOException(
153            "async-profiler process timed out after " + timeoutSecs + " seconds and was killed.");
154        }
155      } catch (InterruptedException e) {
156        Thread.currentThread().interrupt();
157        LOG.warn("Interrupted while waiting for async-profiler process to finish.", e);
158      }
159    }
160    return "";
161  }
162
163  @Override
164  public void destroy() {
165    Process p = process;
166    if (p != null && p.isAlive()) {
167      LOG.info("Destroying async-profiler process on servlet shutdown.");
168      p.destroy();
169    }
170  }
171}