001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import io.opentelemetry.api.trace.Span;
021import io.opentelemetry.api.trace.StatusCode;
022import io.opentelemetry.context.Scope;
023import java.io.File;
024import java.io.IOException;
025import java.util.List;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.LocalHBaseCluster;
029import org.apache.hadoop.hbase.MasterNotRunningException;
030import org.apache.hadoop.hbase.ZNodeClearer;
031import org.apache.hadoop.hbase.ZooKeeperConnectionException;
032import org.apache.hadoop.hbase.client.Admin;
033import org.apache.hadoop.hbase.client.Connection;
034import org.apache.hadoop.hbase.client.ConnectionFactory;
035import org.apache.hadoop.hbase.regionserver.HRegionServer;
036import org.apache.hadoop.hbase.trace.TraceUtil;
037import org.apache.hadoop.hbase.util.JVMClusterUtil;
038import org.apache.hadoop.hbase.util.ServerCommandLine;
039import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
040import org.apache.hadoop.hbase.zookeeper.ZKAuthentication;
041import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
047import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser;
048import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
049import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
050
051@InterfaceAudience.Private
052public class HMasterCommandLine extends ServerCommandLine {
053  private static final Logger LOG = LoggerFactory.getLogger(HMasterCommandLine.class);
054
055  private static final String USAGE = "Usage: Master [opts] start|stop|clear\n"
056    + " start  Start Master. If local mode, start Master and RegionServer in same JVM\n"
057    + " stop   Start cluster shutdown; Master signals RegionServer shutdown\n"
058    + " clear  Delete the master znode in ZooKeeper after a master crashes\n "
059    + " where [opts] are:\n"
060    + "   --minRegionServers=<servers>   Minimum RegionServers needed to host user tables.\n"
061    + "   --localRegionServers=<servers> "
062    + "RegionServers to start in master process when in standalone mode.\n"
063    + "   --masters=<servers>            Masters to start in this process.\n"
064    + "   --backup                       Master should start in backup mode\n"
065    + "   --shutDownCluster                    "
066    + "Start Cluster shutdown; Master signals RegionServer shutdown";
067
068  private final Class<? extends HMaster> masterClass;
069
070  public HMasterCommandLine(Class<? extends HMaster> masterClass) {
071    this.masterClass = masterClass;
072  }
073
074  @Override
075  protected String getUsage() {
076    return USAGE;
077  }
078
079  @Override
080  public int run(String args[]) throws Exception {
081    boolean shutDownCluster = false;
082    Options opt = new Options();
083    opt.addOption("localRegionServers", true,
084      "RegionServers to start in master process when running standalone");
085    opt.addOption("masters", true, "Masters to start in this process");
086    opt.addOption("minRegionServers", true, "Minimum RegionServers needed to host user tables");
087    opt.addOption("backup", false, "Do not try to become HMaster until the primary fails");
088    opt.addOption("shutDownCluster", false,
089      "`hbase master stop --shutDownCluster` shuts down cluster");
090
091    CommandLine cmd;
092    try {
093      cmd = new GnuParser().parse(opt, args);
094    } catch (ParseException e) {
095      LOG.error("Could not parse: ", e);
096      usage(null);
097      return 1;
098    }
099
100    if (cmd.hasOption("minRegionServers")) {
101      String val = cmd.getOptionValue("minRegionServers");
102      getConf().setInt("hbase.regions.server.count.min", Integer.parseInt(val));
103      LOG.debug("minRegionServers set to " + val);
104    }
105
106    // minRegionServers used to be minServers. Support it too.
107    if (cmd.hasOption("minServers")) {
108      String val = cmd.getOptionValue("minServers");
109      getConf().setInt("hbase.regions.server.count.min", Integer.parseInt(val));
110      LOG.debug("minServers set to " + val);
111    }
112
113    // check if we are the backup master - override the conf if so
114    if (cmd.hasOption("backup")) {
115      getConf().setBoolean(HConstants.MASTER_TYPE_BACKUP, true);
116    }
117
118    // How many regionservers to startup in this process (we run regionservers in same process as
119    // master when we are in local/standalone mode. Useful testing)
120    if (cmd.hasOption("localRegionServers")) {
121      String val = cmd.getOptionValue("localRegionServers");
122      getConf().setInt("hbase.regionservers", Integer.parseInt(val));
123      LOG.debug("localRegionServers set to " + val);
124    }
125    // How many masters to startup inside this process; useful testing
126    if (cmd.hasOption("masters")) {
127      String val = cmd.getOptionValue("masters");
128      getConf().setInt("hbase.masters", Integer.parseInt(val));
129      LOG.debug("masters set to " + val);
130    }
131
132    // Checking whether to shut down cluster or not
133    if (cmd.hasOption("shutDownCluster")) {
134      shutDownCluster = true;
135    }
136
137    @SuppressWarnings("unchecked")
138    List<String> remainingArgs = cmd.getArgList();
139    if (remainingArgs.size() != 1) {
140      usage(null);
141      return 1;
142    }
143
144    String command = remainingArgs.get(0);
145
146    if ("start".equals(command)) {
147      return startMaster();
148    } else if ("stop".equals(command)) {
149      if (shutDownCluster) {
150        return stopMaster();
151      }
152      System.err.println("To shutdown the master run "
153        + "hbase-daemon.sh stop master or send a kill signal to the HMaster pid, "
154        + "and to stop HBase Cluster run \"stop-hbase.sh\" or \"hbase master "
155        + "stop --shutDownCluster\"");
156      return 1;
157    } else if ("clear".equals(command)) {
158      return (ZNodeClearer.clear(getConf()) ? 0 : 1);
159    } else {
160      usage("Invalid command: " + command);
161      return 1;
162    }
163  }
164
165  private int startMaster() {
166    Configuration conf = getConf();
167    final Span span = TraceUtil.createSpan("HMasterCommandLine.startMaster");
168    try (Scope ignored = span.makeCurrent()) {
169      // If 'local', defer to LocalHBaseCluster instance. Starts master
170      // and regionserver both in the one JVM.
171      if (LocalHBaseCluster.isLocal(conf)) {
172        DefaultMetricsSystem.setMiniClusterMode(true);
173        final MiniZooKeeperCluster zooKeeperCluster = new MiniZooKeeperCluster(conf);
174        File zkDataPath = new File(conf.get(HConstants.ZOOKEEPER_DATA_DIR));
175
176        // find out the default client port
177        int zkClientPort = 0;
178
179        // If the zookeeper client port is specified in server quorum, use it.
180        String zkserver = conf.get(HConstants.ZOOKEEPER_QUORUM);
181        if (zkserver != null) {
182          String[] zkservers = zkserver.split(",");
183
184          if (zkservers.length > 1) {
185            // In local mode deployment, we have the master + a region server and zookeeper server
186            // started in the same process. Therefore, we only support one zookeeper server.
187            String errorMsg = "Could not start ZK with " + zkservers.length
188              + " ZK servers in local mode deployment. Aborting as clients (e.g. shell) will not "
189              + "be able to find this ZK quorum.";
190            System.err.println(errorMsg);
191            throw new IOException(errorMsg);
192          }
193
194          String[] parts = zkservers[0].split(":");
195
196          if (parts.length == 2) {
197            // the second part is the client port
198            zkClientPort = Integer.parseInt(parts[1]);
199          }
200        }
201        // If the client port could not be find in server quorum conf, try another conf
202        if (zkClientPort == 0) {
203          zkClientPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 0);
204          // The client port has to be set by now; if not, throw exception.
205          if (zkClientPort == 0) {
206            throw new IOException("No config value for " + HConstants.ZOOKEEPER_CLIENT_PORT);
207          }
208        }
209        zooKeeperCluster.setDefaultClientPort(zkClientPort);
210        // set the ZK tick time if specified
211        int zkTickTime = conf.getInt(HConstants.ZOOKEEPER_TICK_TIME, 0);
212        if (zkTickTime > 0) {
213          zooKeeperCluster.setTickTime(zkTickTime);
214        }
215
216        // login the zookeeper server principal (if using security)
217        ZKAuthentication.loginServer(conf, HConstants.ZK_SERVER_KEYTAB_FILE,
218          HConstants.ZK_SERVER_KERBEROS_PRINCIPAL, null);
219        int localZKClusterSessionTimeout =
220          conf.getInt(HConstants.ZK_SESSION_TIMEOUT + ".localHBaseCluster", 10 * 1000);
221        conf.setInt(HConstants.ZK_SESSION_TIMEOUT, localZKClusterSessionTimeout);
222        LOG.info("Starting a zookeeper cluster");
223        int clientPort = zooKeeperCluster.startup(zkDataPath);
224        if (clientPort != zkClientPort) {
225          String errorMsg = "Could not start ZK at requested port of " + zkClientPort
226            + ".  ZK was started at port: " + clientPort
227            + ".  Aborting as clients (e.g. shell) will not be able to find " + "this ZK quorum.";
228          System.err.println(errorMsg);
229          throw new IOException(errorMsg);
230        }
231        conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
232
233        // Need to have the zk cluster shutdown when master is shutdown.
234        // Run a subclass that does the zk cluster shutdown on its way out.
235        int mastersCount = conf.getInt("hbase.masters", 1);
236        int regionServersCount = conf.getInt("hbase.regionservers", 1);
237        // Set start timeout to 5 minutes for cmd line start operations
238        conf.setIfUnset("hbase.master.start.timeout.localHBaseCluster", "300000");
239        LOG.info("Starting up instance of localHBaseCluster; master=" + mastersCount
240          + ", regionserversCount=" + regionServersCount);
241        LocalHBaseCluster cluster = new LocalHBaseCluster(conf, mastersCount, regionServersCount,
242          LocalHMaster.class, HRegionServer.class);
243        ((LocalHMaster) cluster.getMaster(0)).setZKCluster(zooKeeperCluster);
244        cluster.startup();
245        waitOnMasterThreads(cluster);
246      } else {
247        logProcessInfo(getConf());
248        HMaster master = HMaster.constructMaster(masterClass, conf);
249        if (master.isStopped()) {
250          LOG.info("Won't bring the Master up as a shutdown is requested");
251          return 1;
252        }
253        master.start();
254        master.join();
255        if (master.isAborted()) throw new RuntimeException("HMaster Aborted");
256      }
257      span.setStatus(StatusCode.OK);
258    } catch (Throwable t) {
259      TraceUtil.setError(span, t);
260      LOG.error("Master exiting", t);
261      return 1;
262    } finally {
263      span.end();
264    }
265    return 0;
266  }
267
268  @SuppressWarnings("resource")
269  private int stopMaster() {
270    Configuration conf = getConf();
271    // Don't try more than once
272    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0);
273    try (Connection connection = ConnectionFactory.createConnection(conf)) {
274      try (Admin admin = connection.getAdmin()) {
275        admin.shutdown();
276      } catch (Throwable t) {
277        LOG.error("Failed to stop master", t);
278        return 1;
279      }
280    } catch (MasterNotRunningException e) {
281      LOG.error("Master not running");
282      return 1;
283    } catch (ZooKeeperConnectionException e) {
284      LOG.error("ZooKeeper not available");
285      return 1;
286    } catch (IOException e) {
287      LOG.error("Got IOException: " + e.getMessage(), e);
288      return 1;
289    }
290    return 0;
291  }
292
293  private void waitOnMasterThreads(LocalHBaseCluster cluster) throws InterruptedException {
294    List<JVMClusterUtil.MasterThread> masters = cluster.getMasters();
295    List<JVMClusterUtil.RegionServerThread> regionservers = cluster.getRegionServers();
296
297    if (masters != null) {
298      for (JVMClusterUtil.MasterThread t : masters) {
299        t.join();
300        if (t.getMaster().isAborted()) {
301          closeAllRegionServerThreads(regionservers);
302          throw new RuntimeException("HMaster Aborted");
303        }
304      }
305    }
306  }
307
308  private static void
309    closeAllRegionServerThreads(List<JVMClusterUtil.RegionServerThread> regionservers) {
310    for (JVMClusterUtil.RegionServerThread t : regionservers) {
311      t.getRegionServer().stop("HMaster Aborted; Bringing down regions servers");
312    }
313  }
314
315  /*
316   * Version of master that will shutdown the passed zk cluster on its way out.
317   */
318  public static class LocalHMaster extends HMaster {
319    private MiniZooKeeperCluster zkcluster = null;
320
321    public LocalHMaster(Configuration conf) throws IOException {
322      super(conf);
323    }
324
325    @Override
326    public void run() {
327      super.run();
328      if (this.zkcluster != null) {
329        try {
330          this.zkcluster.shutdown();
331        } catch (IOException e) {
332          LOG.error("Failed to shutdown MiniZooKeeperCluster", e);
333        }
334      }
335    }
336
337    void setZKCluster(final MiniZooKeeperCluster zkcluster) {
338      this.zkcluster = zkcluster;
339    }
340  }
341}