001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.io.Closeable; 021import java.io.IOException; 022import org.apache.hadoop.conf.Configurable; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.client.RegionInfoBuilder; 025import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 026import org.apache.hadoop.hbase.util.Threads; 027import org.apache.yetus.audience.InterfaceAudience; 028import org.apache.yetus.audience.InterfaceStability; 029 030/** 031 * This class defines methods that can help with managing HBase clusters from unit tests and system 032 * tests. There are 3 types of cluster deployments: 033 * <ul> 034 * <li><b>SingleProcessHBaseCluster:</b> each server is run in the same JVM in separate threads, 035 * used by unit tests</li> 036 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can 037 * interact with the cluster.</li> 038 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate JVMs. 039 * </li> 040 * </ul> 041 * <p> 042 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can be run 043 * against a mini-cluster during unit test execution, or a distributed cluster having tens/hundreds 044 * of nodes during execution of integration tests. 045 * <p> 046 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume 047 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster, and 048 * some tests will still need to mock stuff and introspect internal state. For those use cases from 049 * unit tests, or if more control is needed, you can use the subclasses directly. In that sense, 050 * this class does not abstract away <strong>every</strong> interface that SingleProcessHBaseCluster 051 * or DistributedHBaseCluster provide. 052 */ 053@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.PHOENIX) 054@InterfaceStability.Evolving 055public abstract class HBaseClusterInterface implements Closeable, Configurable { 056 057 protected Configuration conf; 058 059 /** the status of the cluster before we begin */ 060 protected ClusterMetrics initialClusterStatus; 061 062 /** 063 * Construct an HBaseCluster 064 * @param conf Configuration to be used for cluster 065 */ 066 public HBaseClusterInterface(Configuration conf) { 067 setConf(conf); 068 } 069 070 @Override 071 public void setConf(Configuration conf) { 072 this.conf = conf; 073 } 074 075 @Override 076 public Configuration getConf() { 077 return conf; 078 } 079 080 /** 081 * Returns a ClusterMetrics for this HBase cluster. 082 * @see #getInitialClusterMetrics() 083 */ 084 public abstract ClusterMetrics getClusterMetrics() throws IOException; 085 086 /** 087 * Returns a ClusterStatus for this HBase cluster as observed at the starting of the HBaseCluster 088 */ 089 public ClusterMetrics getInitialClusterMetrics() throws IOException { 090 return initialClusterStatus; 091 } 092 093 /** 094 * Starts a new region server on the given hostname or if this is a mini/local cluster, starts a 095 * region server locally. 096 * @param hostname the hostname to start the regionserver on 097 * @throws IOException if something goes wrong 098 */ 099 public abstract void startRegionServer(String hostname, int port) throws IOException; 100 101 /** 102 * Kills the region server process if this is a distributed cluster, otherwise this causes the 103 * region server to exit doing basic clean up only. 104 * @throws IOException if something goes wrong 105 */ 106 public abstract void killRegionServer(ServerName serverName) throws IOException; 107 108 /** 109 * Keeping track of killed servers and being able to check if a particular server was killed makes 110 * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete 111 * example of such case is - killing servers and waiting for all regions of a particular table to 112 * be assigned. We can check for server column in META table and that its value is not one of the 113 * killed servers. 114 */ 115 public abstract boolean isKilledRS(ServerName serverName); 116 117 /** 118 * Stops the given region server, by attempting a gradual stop. 119 * @throws IOException if something goes wrong 120 */ 121 public abstract void stopRegionServer(ServerName serverName) throws IOException; 122 123 /** 124 * Wait for the specified region server to join the cluster 125 * @throws IOException if something goes wrong or timeout occurs 126 */ 127 public void waitForRegionServerToStart(String hostname, int port, long timeout) 128 throws IOException { 129 long start = EnvironmentEdgeManager.currentTime(); 130 while ((EnvironmentEdgeManager.currentTime() - start) < timeout) { 131 for (ServerName server : getClusterMetrics().getLiveServerMetrics().keySet()) { 132 if (server.getHostname().equals(hostname) && server.getPort() == port) { 133 return; 134 } 135 } 136 Threads.sleep(100); 137 } 138 throw new IOException( 139 "did timeout " + timeout + "ms waiting for region server to start: " + hostname); 140 } 141 142 /** 143 * Wait for the specified region server to stop the thread / process. 144 * @throws IOException if something goes wrong or timeout occurs 145 */ 146 public abstract void waitForRegionServerToStop(ServerName serverName, long timeout) 147 throws IOException; 148 149 /** 150 * Suspend the region server 151 * @param serverName the hostname to suspend the regionserver on 152 * @throws IOException if something goes wrong 153 */ 154 public abstract void suspendRegionServer(ServerName serverName) throws IOException; 155 156 /** 157 * Resume the region server 158 * @param serverName the hostname to resume the regionserver on 159 * @throws IOException if something goes wrong 160 */ 161 public abstract void resumeRegionServer(ServerName serverName) throws IOException; 162 163 /** 164 * Wait for the specified region server to suspend the thread / process. 165 * @throws IOException if something goes wrong or timeout occurs 166 */ 167 public abstract void waitForRegionServerToSuspend(ServerName serverName, long timeout) 168 throws IOException; 169 170 /** 171 * Wait for the specified region server to resume the thread / process. 172 * @throws IOException if something goes wrong or timeout occurs 173 */ 174 public abstract void waitForRegionServerToResume(ServerName serverName, long timeout) 175 throws IOException; 176 177 /** 178 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster, silently 179 * logs warning message. 180 * @param hostname the hostname to start the regionserver on 181 * @throws IOException if something goes wrong 182 */ 183 public abstract void startZkNode(String hostname, int port) throws IOException; 184 185 /** 186 * Kills the zookeeper node process if this is a distributed cluster, otherwise, this causes 187 * master to exit doing basic clean up only. 188 * @throws IOException if something goes wrong 189 */ 190 public abstract void killZkNode(ServerName serverName) throws IOException; 191 192 /** 193 * Stops the region zookeeper if this is a distributed cluster, otherwise silently logs warning 194 * message. 195 * @throws IOException if something goes wrong 196 */ 197 public abstract void stopZkNode(ServerName serverName) throws IOException; 198 199 /** 200 * Wait for the specified zookeeper node to join the cluster 201 * @throws IOException if something goes wrong or timeout occurs 202 */ 203 public abstract void waitForZkNodeToStart(ServerName serverName, long timeout) throws IOException; 204 205 /** 206 * Wait for the specified zookeeper node to stop the thread / process. 207 * @throws IOException if something goes wrong or timeout occurs 208 */ 209 public abstract void waitForZkNodeToStop(ServerName serverName, long timeout) throws IOException; 210 211 /** 212 * Starts a new datanode on the given hostname or if this is a mini/local cluster, silently logs 213 * warning message. 214 * @throws IOException if something goes wrong 215 */ 216 public abstract void startDataNode(ServerName serverName) throws IOException; 217 218 /** 219 * Kills the datanode process if this is a distributed cluster, otherwise, this causes master to 220 * exit doing basic clean up only. 221 * @throws IOException if something goes wrong 222 */ 223 public abstract void killDataNode(ServerName serverName) throws IOException; 224 225 /** 226 * Stops the datanode if this is a distributed cluster, otherwise silently logs warning message. 227 * @throws IOException if something goes wrong 228 */ 229 public abstract void stopDataNode(ServerName serverName) throws IOException; 230 231 /** 232 * Wait for the specified datanode to join the cluster 233 * @throws IOException if something goes wrong or timeout occurs 234 */ 235 public abstract void waitForDataNodeToStart(ServerName serverName, long timeout) 236 throws IOException; 237 238 /** 239 * Wait for the specified datanode to stop the thread / process. 240 * @throws IOException if something goes wrong or timeout occurs 241 */ 242 public abstract void waitForDataNodeToStop(ServerName serverName, long timeout) 243 throws IOException; 244 245 /** 246 * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs 247 * warning message. 248 * @throws IOException if something goes wrong 249 */ 250 public abstract void startNameNode(ServerName serverName) throws IOException; 251 252 /** 253 * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to 254 * exit doing basic clean up only. 255 * @throws IOException if something goes wrong 256 */ 257 public abstract void killNameNode(ServerName serverName) throws IOException; 258 259 /** 260 * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message. 261 * @throws IOException if something goes wrong 262 */ 263 public abstract void stopNameNode(ServerName serverName) throws IOException; 264 265 /** 266 * Wait for the specified namenode to join the cluster 267 * @throws IOException if something goes wrong or timeout occurs 268 */ 269 public abstract void waitForNameNodeToStart(ServerName serverName, long timeout) 270 throws IOException; 271 272 /** 273 * Wait for the specified namenode to stop 274 * @throws IOException if something goes wrong or timeout occurs 275 */ 276 public abstract void waitForNameNodeToStop(ServerName serverName, long timeout) 277 throws IOException; 278 279 /** 280 * Starts a new journalnode on the given hostname or if this is a mini/local cluster, silently 281 * logs warning message. 282 * @throws IOException if something goes wrong 283 */ 284 public abstract void startJournalNode(ServerName serverName) throws IOException; 285 286 /** 287 * Kills the journalnode process if this is a distributed cluster, otherwise, this causes master 288 * to exit doing basic clean up only. 289 * @throws IOException if something goes wrong 290 */ 291 public abstract void killJournalNode(ServerName serverName) throws IOException; 292 293 /** 294 * Stops the journalnode if this is a distributed cluster, otherwise silently logs warning 295 * message. 296 * @throws IOException if something goes wrong 297 */ 298 public abstract void stopJournalNode(ServerName serverName) throws IOException; 299 300 /** 301 * Wait for the specified journalnode to join the cluster 302 * @throws IOException if something goes wrong or timeout occurs 303 */ 304 public abstract void waitForJournalNodeToStart(ServerName serverName, long timeout) 305 throws IOException; 306 307 /** 308 * Wait for the specified journalnode to stop 309 * @throws IOException if something goes wrong or timeout occurs 310 */ 311 public abstract void waitForJournalNodeToStop(ServerName serverName, long timeout) 312 throws IOException; 313 314 /** 315 * Starts a new master on the given hostname or if this is a mini/local cluster, starts a master 316 * locally. 317 * @param hostname the hostname to start the master on 318 * @throws IOException if something goes wrong 319 */ 320 public abstract void startMaster(String hostname, int port) throws IOException; 321 322 /** 323 * Kills the master process if this is a distributed cluster, otherwise, this causes master to 324 * exit doing basic clean up only. 325 * @throws IOException if something goes wrong 326 */ 327 public abstract void killMaster(ServerName serverName) throws IOException; 328 329 /** 330 * Stops the given master, by attempting a gradual stop. 331 * @throws IOException if something goes wrong 332 */ 333 public abstract void stopMaster(ServerName serverName) throws IOException; 334 335 /** 336 * Wait for the specified master to stop the thread / process. 337 * @throws IOException if something goes wrong or timeout occurs 338 */ 339 public abstract void waitForMasterToStop(ServerName serverName, long timeout) throws IOException; 340 341 /** 342 * Blocks until there is an active master and that master has completed initialization. 343 * @return true if an active master becomes available. false if there are no masters left. 344 * @throws IOException if something goes wrong or timeout occurs 345 */ 346 public boolean waitForActiveAndReadyMaster() throws IOException { 347 return waitForActiveAndReadyMaster(Long.MAX_VALUE); 348 } 349 350 /** 351 * Blocks until there is an active master and that master has completed initialization. 352 * @param timeout the timeout limit in ms 353 * @return true if an active master becomes available. false if there are no masters left. 354 */ 355 public abstract boolean waitForActiveAndReadyMaster(long timeout) throws IOException; 356 357 /** 358 * Wait for HBase Cluster to shut down. 359 */ 360 public abstract void waitUntilShutDown() throws IOException; 361 362 /** 363 * Shut down the HBase cluster 364 */ 365 public abstract void shutdown() throws IOException; 366 367 /** 368 * Restores the cluster to it's initial state if this is a real cluster, otherwise does nothing. 369 * This is a best effort restore. If the servers are not reachable, or insufficient permissions, 370 * etc. restoration might be partial. 371 * @return whether restoration is complete 372 */ 373 public boolean restoreInitialStatus() throws IOException { 374 return restoreClusterMetrics(getInitialClusterMetrics()); 375 } 376 377 /** 378 * Restores the cluster to given state if this is a real cluster, otherwise does nothing. This is 379 * a best effort restore. If the servers are not reachable, or insufficient permissions, etc. 380 * restoration might be partial. 381 * @return whether restoration is complete 382 */ 383 public boolean restoreClusterMetrics(ClusterMetrics desiredStatus) throws IOException { 384 return true; 385 } 386 387 /** 388 * Get the ServerName of region server serving the first hbase:meta region 389 */ 390 public ServerName getServerHoldingMeta() throws IOException { 391 return getServerHoldingRegion(TableName.META_TABLE_NAME, 392 RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName()); 393 } 394 395 /** 396 * Get the ServerName of region server serving the specified region 397 * @param regionName Name of the region in bytes 398 * @param tn Table name that has the region. 399 * @return ServerName that hosts the region or null 400 */ 401 public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName) 402 throws IOException; 403 404 /** 405 * @return whether we are interacting with a distributed cluster as opposed to an in-process 406 * mini/local cluster. 407 */ 408 public boolean isDistributedCluster() { 409 return false; 410 } 411 412 /** 413 * Closes all the resources held open for this cluster. Note that this call does not shutdown the 414 * cluster. 415 * @see #shutdown() 416 */ 417 @Override 418 public abstract void close() throws IOException; 419 420 /** 421 * Wait for the namenode. 422 */ 423 public void waitForNamenodeAvailable() throws InterruptedException { 424 } 425 426 public void waitForDatanodesRegistered(int nbDN) throws Exception { 427 } 428}