001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import io.opentelemetry.api.trace.Span; 021import io.opentelemetry.api.trace.StatusCode; 022import io.opentelemetry.context.Scope; 023import java.io.File; 024import java.io.IOException; 025import java.util.List; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.LocalHBaseCluster; 029import org.apache.hadoop.hbase.MasterNotRunningException; 030import org.apache.hadoop.hbase.ZNodeClearer; 031import org.apache.hadoop.hbase.ZooKeeperConnectionException; 032import org.apache.hadoop.hbase.client.Admin; 033import org.apache.hadoop.hbase.client.Connection; 034import org.apache.hadoop.hbase.client.ConnectionFactory; 035import org.apache.hadoop.hbase.regionserver.HRegionServer; 036import org.apache.hadoop.hbase.trace.TraceUtil; 037import org.apache.hadoop.hbase.util.JVMClusterUtil; 038import org.apache.hadoop.hbase.util.ServerCommandLine; 039import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; 040import org.apache.hadoop.hbase.zookeeper.ZKAuthentication; 041import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 047import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; 048import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 049import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 050 051@InterfaceAudience.Private 052public class HMasterCommandLine extends ServerCommandLine { 053 private static final Logger LOG = LoggerFactory.getLogger(HMasterCommandLine.class); 054 055 private static final String USAGE = "Usage: Master [opts] start|stop|clear\n" 056 + " start Start Master. If local mode, start Master and RegionServer in same JVM\n" 057 + " stop Start cluster shutdown; Master signals RegionServer shutdown\n" 058 + " clear Delete the master znode in ZooKeeper after a master crashes\n " 059 + " where [opts] are:\n" 060 + " --minRegionServers=<servers> Minimum RegionServers needed to host user tables.\n" 061 + " --localRegionServers=<servers> " 062 + "RegionServers to start in master process when in standalone mode.\n" 063 + " --masters=<servers> Masters to start in this process.\n" 064 + " --backup Master should start in backup mode\n" 065 + " --shutDownCluster " 066 + "Start Cluster shutdown; Master signals RegionServer shutdown"; 067 068 private final Class<? extends HMaster> masterClass; 069 070 public HMasterCommandLine(Class<? extends HMaster> masterClass) { 071 this.masterClass = masterClass; 072 } 073 074 @Override 075 protected String getUsage() { 076 return USAGE; 077 } 078 079 @Override 080 public int run(String args[]) throws Exception { 081 boolean shutDownCluster = false; 082 Options opt = new Options(); 083 opt.addOption("localRegionServers", true, 084 "RegionServers to start in master process when running standalone"); 085 opt.addOption("masters", true, "Masters to start in this process"); 086 opt.addOption("minRegionServers", true, "Minimum RegionServers needed to host user tables"); 087 opt.addOption("backup", false, "Do not try to become HMaster until the primary fails"); 088 opt.addOption("shutDownCluster", false, 089 "`hbase master stop --shutDownCluster` shuts down cluster"); 090 091 CommandLine cmd; 092 try { 093 cmd = new GnuParser().parse(opt, args); 094 } catch (ParseException e) { 095 LOG.error("Could not parse: ", e); 096 usage(null); 097 return 1; 098 } 099 100 if (cmd.hasOption("minRegionServers")) { 101 String val = cmd.getOptionValue("minRegionServers"); 102 getConf().setInt("hbase.regions.server.count.min", Integer.parseInt(val)); 103 LOG.debug("minRegionServers set to " + val); 104 } 105 106 // minRegionServers used to be minServers. Support it too. 107 if (cmd.hasOption("minServers")) { 108 String val = cmd.getOptionValue("minServers"); 109 getConf().setInt("hbase.regions.server.count.min", Integer.parseInt(val)); 110 LOG.debug("minServers set to " + val); 111 } 112 113 // check if we are the backup master - override the conf if so 114 if (cmd.hasOption("backup")) { 115 getConf().setBoolean(HConstants.MASTER_TYPE_BACKUP, true); 116 } 117 118 // How many regionservers to startup in this process (we run regionservers in same process as 119 // master when we are in local/standalone mode. Useful testing) 120 if (cmd.hasOption("localRegionServers")) { 121 String val = cmd.getOptionValue("localRegionServers"); 122 getConf().setInt("hbase.regionservers", Integer.parseInt(val)); 123 LOG.debug("localRegionServers set to " + val); 124 } 125 // How many masters to startup inside this process; useful testing 126 if (cmd.hasOption("masters")) { 127 String val = cmd.getOptionValue("masters"); 128 getConf().setInt("hbase.masters", Integer.parseInt(val)); 129 LOG.debug("masters set to " + val); 130 } 131 132 // Checking whether to shut down cluster or not 133 if (cmd.hasOption("shutDownCluster")) { 134 shutDownCluster = true; 135 } 136 137 @SuppressWarnings("unchecked") 138 List<String> remainingArgs = cmd.getArgList(); 139 if (remainingArgs.size() != 1) { 140 usage(null); 141 return 1; 142 } 143 144 String command = remainingArgs.get(0); 145 146 if ("start".equals(command)) { 147 return startMaster(); 148 } else if ("stop".equals(command)) { 149 if (shutDownCluster) { 150 return stopMaster(); 151 } 152 System.err.println("To shutdown the master run " 153 + "hbase-daemon.sh stop master or send a kill signal to the HMaster pid, " 154 + "and to stop HBase Cluster run \"stop-hbase.sh\" or \"hbase master " 155 + "stop --shutDownCluster\""); 156 return 1; 157 } else if ("clear".equals(command)) { 158 return (ZNodeClearer.clear(getConf()) ? 0 : 1); 159 } else { 160 usage("Invalid command: " + command); 161 return 1; 162 } 163 } 164 165 private int startMaster() { 166 Configuration conf = getConf(); 167 final Span span = TraceUtil.createSpan("HMasterCommandLine.startMaster"); 168 try (Scope ignored = span.makeCurrent()) { 169 // If 'local', defer to LocalHBaseCluster instance. Starts master 170 // and regionserver both in the one JVM. 171 if (LocalHBaseCluster.isLocal(conf)) { 172 DefaultMetricsSystem.setMiniClusterMode(true); 173 final MiniZooKeeperCluster zooKeeperCluster = new MiniZooKeeperCluster(conf); 174 File zkDataPath = new File(conf.get(HConstants.ZOOKEEPER_DATA_DIR)); 175 176 // find out the default client port 177 int zkClientPort = 0; 178 179 // If the zookeeper client port is specified in server quorum, use it. 180 String zkserver = conf.get(HConstants.ZOOKEEPER_QUORUM); 181 if (zkserver != null) { 182 String[] zkservers = zkserver.split(","); 183 184 if (zkservers.length > 1) { 185 // In local mode deployment, we have the master + a region server and zookeeper server 186 // started in the same process. Therefore, we only support one zookeeper server. 187 String errorMsg = "Could not start ZK with " + zkservers.length 188 + " ZK servers in local mode deployment. Aborting as clients (e.g. shell) will not " 189 + "be able to find this ZK quorum."; 190 System.err.println(errorMsg); 191 throw new IOException(errorMsg); 192 } 193 194 String[] parts = zkservers[0].split(":"); 195 196 if (parts.length == 2) { 197 // the second part is the client port 198 zkClientPort = Integer.parseInt(parts[1]); 199 } 200 } 201 // If the client port could not be find in server quorum conf, try another conf 202 if (zkClientPort == 0) { 203 zkClientPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 0); 204 // The client port has to be set by now; if not, throw exception. 205 if (zkClientPort == 0) { 206 throw new IOException("No config value for " + HConstants.ZOOKEEPER_CLIENT_PORT); 207 } 208 } 209 zooKeeperCluster.setDefaultClientPort(zkClientPort); 210 // set the ZK tick time if specified 211 int zkTickTime = conf.getInt(HConstants.ZOOKEEPER_TICK_TIME, 0); 212 if (zkTickTime > 0) { 213 zooKeeperCluster.setTickTime(zkTickTime); 214 } 215 216 // login the zookeeper server principal (if using security) 217 ZKAuthentication.loginServer(conf, HConstants.ZK_SERVER_KEYTAB_FILE, 218 HConstants.ZK_SERVER_KERBEROS_PRINCIPAL, null); 219 int localZKClusterSessionTimeout = 220 conf.getInt(HConstants.ZK_SESSION_TIMEOUT + ".localHBaseCluster", 10 * 1000); 221 conf.setInt(HConstants.ZK_SESSION_TIMEOUT, localZKClusterSessionTimeout); 222 LOG.info("Starting a zookeeper cluster"); 223 int clientPort = zooKeeperCluster.startup(zkDataPath); 224 if (clientPort != zkClientPort) { 225 String errorMsg = "Could not start ZK at requested port of " + zkClientPort 226 + ". ZK was started at port: " + clientPort 227 + ". Aborting as clients (e.g. shell) will not be able to find " + "this ZK quorum."; 228 System.err.println(errorMsg); 229 throw new IOException(errorMsg); 230 } 231 conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort)); 232 233 // Need to have the zk cluster shutdown when master is shutdown. 234 // Run a subclass that does the zk cluster shutdown on its way out. 235 int mastersCount = conf.getInt("hbase.masters", 1); 236 int regionServersCount = conf.getInt("hbase.regionservers", 1); 237 // Set start timeout to 5 minutes for cmd line start operations 238 conf.setIfUnset("hbase.master.start.timeout.localHBaseCluster", "300000"); 239 LOG.info("Starting up instance of localHBaseCluster; master=" + mastersCount 240 + ", regionserversCount=" + regionServersCount); 241 LocalHBaseCluster cluster = new LocalHBaseCluster(conf, mastersCount, regionServersCount, 242 LocalHMaster.class, HRegionServer.class); 243 ((LocalHMaster) cluster.getMaster(0)).setZKCluster(zooKeeperCluster); 244 cluster.startup(); 245 waitOnMasterThreads(cluster); 246 } else { 247 logProcessInfo(getConf()); 248 HMaster master = HMaster.constructMaster(masterClass, conf); 249 if (master.isStopped()) { 250 LOG.info("Won't bring the Master up as a shutdown is requested"); 251 return 1; 252 } 253 master.start(); 254 master.join(); 255 if (master.isAborted()) throw new RuntimeException("HMaster Aborted"); 256 } 257 span.setStatus(StatusCode.OK); 258 } catch (Throwable t) { 259 TraceUtil.setError(span, t); 260 LOG.error("Master exiting", t); 261 return 1; 262 } finally { 263 span.end(); 264 } 265 return 0; 266 } 267 268 @SuppressWarnings("resource") 269 private int stopMaster() { 270 Configuration conf = getConf(); 271 // Don't try more than once 272 conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0); 273 try (Connection connection = ConnectionFactory.createConnection(conf)) { 274 try (Admin admin = connection.getAdmin()) { 275 admin.shutdown(); 276 } catch (Throwable t) { 277 LOG.error("Failed to stop master", t); 278 return 1; 279 } 280 } catch (MasterNotRunningException e) { 281 LOG.error("Master not running"); 282 return 1; 283 } catch (ZooKeeperConnectionException e) { 284 LOG.error("ZooKeeper not available"); 285 return 1; 286 } catch (IOException e) { 287 LOG.error("Got IOException: " + e.getMessage(), e); 288 return 1; 289 } 290 return 0; 291 } 292 293 private void waitOnMasterThreads(LocalHBaseCluster cluster) throws InterruptedException { 294 List<JVMClusterUtil.MasterThread> masters = cluster.getMasters(); 295 List<JVMClusterUtil.RegionServerThread> regionservers = cluster.getRegionServers(); 296 297 if (masters != null) { 298 for (JVMClusterUtil.MasterThread t : masters) { 299 t.join(); 300 if (t.getMaster().isAborted()) { 301 closeAllRegionServerThreads(regionservers); 302 throw new RuntimeException("HMaster Aborted"); 303 } 304 } 305 } 306 } 307 308 private static void 309 closeAllRegionServerThreads(List<JVMClusterUtil.RegionServerThread> regionservers) { 310 for (JVMClusterUtil.RegionServerThread t : regionservers) { 311 t.getRegionServer().stop("HMaster Aborted; Bringing down regions servers"); 312 } 313 } 314 315 /* 316 * Version of master that will shutdown the passed zk cluster on its way out. 317 */ 318 public static class LocalHMaster extends HMaster { 319 private MiniZooKeeperCluster zkcluster = null; 320 321 public LocalHMaster(Configuration conf) throws IOException { 322 super(conf); 323 } 324 325 @Override 326 public void run() { 327 super.run(); 328 if (this.zkcluster != null) { 329 try { 330 this.zkcluster.shutdown(); 331 } catch (IOException e) { 332 e.printStackTrace(); 333 } 334 } 335 } 336 337 void setZKCluster(final MiniZooKeeperCluster zkcluster) { 338 this.zkcluster = zkcluster; 339 } 340 } 341}