001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.io.Closeable;
021import java.io.IOException;
022import org.apache.hadoop.conf.Configurable;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.client.RegionInfoBuilder;
025import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
026import org.apache.hadoop.hbase.util.Threads;
027import org.apache.yetus.audience.InterfaceAudience;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031/**
032 * This class defines methods that can help with managing HBase clusters from unit tests and system
033 * tests. There are 3 types of cluster deployments:
034 * <ul>
035 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads, used by unit
036 * tests</li>
037 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can
038 * interact with the cluster.</li>
039 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate JVMs.
040 * </li>
041 * </ul>
042 * <p>
043 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can be run
044 * against a mini-cluster during unit test execution, or a distributed cluster having tens/hundreds
045 * of nodes during execution of integration tests.
046 * <p>
047 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume
048 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster, and
049 * some tests will still need to mock stuff and introspect internal state. For those use cases from
050 * unit tests, or if more control is needed, you can use the subclasses directly. In that sense,
051 * this class does not abstract away <strong>every</strong> interface that MiniHBaseCluster or
052 * DistributedHBaseCluster provide.
053 * @deprecated since 3.0.0, will be removed in 4.0.0. Use
054 *             {@link org.apache.hadoop.hbase.testing.TestingHBaseCluster} instead.
055 */
056@InterfaceAudience.Public
057@Deprecated
058public abstract class HBaseCluster implements Closeable, Configurable {
059  // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope
060  static final Logger LOG = LoggerFactory.getLogger(HBaseCluster.class.getName());
061  protected Configuration conf;
062
063  /** the status of the cluster before we begin */
064  protected ClusterMetrics initialClusterStatus;
065
066  /**
067   * Construct an HBaseCluster
068   * @param conf Configuration to be used for cluster
069   */
070  public HBaseCluster(Configuration conf) {
071    setConf(conf);
072  }
073
074  @Override
075  public void setConf(Configuration conf) {
076    this.conf = conf;
077  }
078
079  @Override
080  public Configuration getConf() {
081    return conf;
082  }
083
084  /**
085   * Returns a ClusterMetrics for this HBase cluster.
086   * @see #getInitialClusterMetrics()
087   */
088  public abstract ClusterMetrics getClusterMetrics() throws IOException;
089
090  /**
091   * Returns a ClusterStatus for this HBase cluster as observed at the starting of the HBaseCluster
092   */
093  public ClusterMetrics getInitialClusterMetrics() throws IOException {
094    return initialClusterStatus;
095  }
096
097  /**
098   * Starts a new region server on the given hostname or if this is a mini/local cluster, starts a
099   * region server locally.
100   * @param hostname the hostname to start the regionserver on
101   * @throws IOException if something goes wrong
102   */
103  public abstract void startRegionServer(String hostname, int port) throws IOException;
104
105  /**
106   * Kills the region server process if this is a distributed cluster, otherwise this causes the
107   * region server to exit doing basic clean up only.
108   * @throws IOException if something goes wrong
109   */
110  public abstract void killRegionServer(ServerName serverName) throws IOException;
111
112  /**
113   * Keeping track of killed servers and being able to check if a particular server was killed makes
114   * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
115   * example of such case is - killing servers and waiting for all regions of a particular table to
116   * be assigned. We can check for server column in META table and that its value is not one of the
117   * killed servers.
118   */
119  public abstract boolean isKilledRS(ServerName serverName);
120
121  /**
122   * Stops the given region server, by attempting a gradual stop.
123   * @throws IOException if something goes wrong
124   */
125  public abstract void stopRegionServer(ServerName serverName) throws IOException;
126
127  /**
128   * Wait for the specified region server to join the cluster
129   * @throws IOException if something goes wrong or timeout occurs
130   */
131  public void waitForRegionServerToStart(String hostname, int port, long timeout)
132    throws IOException {
133    long start = EnvironmentEdgeManager.currentTime();
134    while ((EnvironmentEdgeManager.currentTime() - start) < timeout) {
135      for (ServerName server : getClusterMetrics().getLiveServerMetrics().keySet()) {
136        if (server.getHostname().equals(hostname) && server.getPort() == port) {
137          return;
138        }
139      }
140      Threads.sleep(100);
141    }
142    throw new IOException(
143      "did timeout " + timeout + "ms waiting for region server to start: " + hostname);
144  }
145
146  /**
147   * Wait for the specified region server to stop the thread / process.
148   * @throws IOException if something goes wrong or timeout occurs
149   */
150  public abstract void waitForRegionServerToStop(ServerName serverName, long timeout)
151    throws IOException;
152
153  /**
154   * Suspend the region server
155   * @param serverName the hostname to suspend the regionserver on
156   * @throws IOException if something goes wrong
157   */
158  public abstract void suspendRegionServer(ServerName serverName) throws IOException;
159
160  /**
161   * Resume the region server
162   * @param serverName the hostname to resume the regionserver on
163   * @throws IOException if something goes wrong
164   */
165  public abstract void resumeRegionServer(ServerName serverName) throws IOException;
166
167  /**
168   * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster, silently
169   * logs warning message.
170   * @param hostname the hostname to start the regionserver on
171   * @throws IOException if something goes wrong
172   */
173  public abstract void startZkNode(String hostname, int port) throws IOException;
174
175  /**
176   * Kills the zookeeper node process if this is a distributed cluster, otherwise, this causes
177   * master to exit doing basic clean up only.
178   * @throws IOException if something goes wrong
179   */
180  public abstract void killZkNode(ServerName serverName) throws IOException;
181
182  /**
183   * Stops the region zookeeper if this is a distributed cluster, otherwise silently logs warning
184   * message.
185   * @throws IOException if something goes wrong
186   */
187  public abstract void stopZkNode(ServerName serverName) throws IOException;
188
189  /**
190   * Wait for the specified zookeeper node to join the cluster
191   * @throws IOException if something goes wrong or timeout occurs
192   */
193  public abstract void waitForZkNodeToStart(ServerName serverName, long timeout) throws IOException;
194
195  /**
196   * Wait for the specified zookeeper node to stop the thread / process.
197   * @throws IOException if something goes wrong or timeout occurs
198   */
199  public abstract void waitForZkNodeToStop(ServerName serverName, long timeout) throws IOException;
200
201  /**
202   * Starts a new datanode on the given hostname or if this is a mini/local cluster, silently logs
203   * warning message.
204   * @throws IOException if something goes wrong
205   */
206  public abstract void startDataNode(ServerName serverName) throws IOException;
207
208  /**
209   * Kills the datanode process if this is a distributed cluster, otherwise, this causes master to
210   * exit doing basic clean up only.
211   * @throws IOException if something goes wrong
212   */
213  public abstract void killDataNode(ServerName serverName) throws IOException;
214
215  /**
216   * Stops the datanode if this is a distributed cluster, otherwise silently logs warning message.
217   * @throws IOException if something goes wrong
218   */
219  public abstract void stopDataNode(ServerName serverName) throws IOException;
220
221  /**
222   * Wait for the specified datanode to join the cluster
223   * @throws IOException if something goes wrong or timeout occurs
224   */
225  public abstract void waitForDataNodeToStart(ServerName serverName, long timeout)
226    throws IOException;
227
228  /**
229   * Wait for the specified datanode to stop the thread / process.
230   * @throws IOException if something goes wrong or timeout occurs
231   */
232  public abstract void waitForDataNodeToStop(ServerName serverName, long timeout)
233    throws IOException;
234
235  /**
236   * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs
237   * warning message.
238   * @throws IOException if something goes wrong
239   */
240  public abstract void startNameNode(ServerName serverName) throws IOException;
241
242  /**
243   * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to
244   * exit doing basic clean up only.
245   * @throws IOException if something goes wrong
246   */
247  public abstract void killNameNode(ServerName serverName) throws IOException;
248
249  /**
250   * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message.
251   * @throws IOException if something goes wrong
252   */
253  public abstract void stopNameNode(ServerName serverName) throws IOException;
254
255  /**
256   * Wait for the specified namenode to join the cluster
257   * @throws IOException if something goes wrong or timeout occurs
258   */
259  public abstract void waitForNameNodeToStart(ServerName serverName, long timeout)
260    throws IOException;
261
262  /**
263   * Wait for the specified namenode to stop
264   * @throws IOException if something goes wrong or timeout occurs
265   */
266  public abstract void waitForNameNodeToStop(ServerName serverName, long timeout)
267    throws IOException;
268
269  /**
270   * Starts a new master on the given hostname or if this is a mini/local cluster, starts a master
271   * locally.
272   * @param hostname the hostname to start the master on
273   * @throws IOException if something goes wrong
274   */
275  public abstract void startMaster(String hostname, int port) throws IOException;
276
277  /**
278   * Kills the master process if this is a distributed cluster, otherwise, this causes master to
279   * exit doing basic clean up only.
280   * @throws IOException if something goes wrong
281   */
282  public abstract void killMaster(ServerName serverName) throws IOException;
283
284  /**
285   * Stops the given master, by attempting a gradual stop.
286   * @throws IOException if something goes wrong
287   */
288  public abstract void stopMaster(ServerName serverName) throws IOException;
289
290  /**
291   * Wait for the specified master to stop the thread / process.
292   * @throws IOException if something goes wrong or timeout occurs
293   */
294  public abstract void waitForMasterToStop(ServerName serverName, long timeout) throws IOException;
295
296  /**
297   * Blocks until there is an active master and that master has completed initialization.
298   * @return true if an active master becomes available. false if there are no masters left.
299   * @throws IOException if something goes wrong or timeout occurs
300   */
301  public boolean waitForActiveAndReadyMaster() throws IOException {
302    return waitForActiveAndReadyMaster(Long.MAX_VALUE);
303  }
304
305  /**
306   * Blocks until there is an active master and that master has completed initialization.
307   * @param timeout the timeout limit in ms
308   * @return true if an active master becomes available. false if there are no masters left.
309   */
310  public abstract boolean waitForActiveAndReadyMaster(long timeout) throws IOException;
311
312  /**
313   * Wait for HBase Cluster to shut down.
314   */
315  public abstract void waitUntilShutDown() throws IOException;
316
317  /**
318   * Shut down the HBase cluster
319   */
320  public abstract void shutdown() throws IOException;
321
322  /**
323   * Restores the cluster to it's initial state if this is a real cluster, otherwise does nothing.
324   * This is a best effort restore. If the servers are not reachable, or insufficient permissions,
325   * etc. restoration might be partial.
326   * @return whether restoration is complete
327   */
328  public boolean restoreInitialStatus() throws IOException {
329    return restoreClusterMetrics(getInitialClusterMetrics());
330  }
331
332  /**
333   * Restores the cluster to given state if this is a real cluster, otherwise does nothing. This is
334   * a best effort restore. If the servers are not reachable, or insufficient permissions, etc.
335   * restoration might be partial.
336   * @return whether restoration is complete
337   */
338  public boolean restoreClusterMetrics(ClusterMetrics desiredStatus) throws IOException {
339    return true;
340  }
341
342  /**
343   * Get the ServerName of region server serving the first hbase:meta region
344   */
345  public ServerName getServerHoldingMeta() throws IOException {
346    return getServerHoldingRegion(TableName.META_TABLE_NAME,
347      RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName());
348  }
349
350  /**
351   * Get the ServerName of region server serving the specified region
352   * @param regionName Name of the region in bytes
353   * @param tn         Table name that has the region.
354   * @return ServerName that hosts the region or null
355   */
356  public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName)
357    throws IOException;
358
359  /**
360   * @return whether we are interacting with a distributed cluster as opposed to an in-process
361   *         mini/local cluster.
362   */
363  public boolean isDistributedCluster() {
364    return false;
365  }
366
367  /**
368   * Closes all the resources held open for this cluster. Note that this call does not shutdown the
369   * cluster.
370   * @see #shutdown()
371   */
372  @Override
373  public abstract void close() throws IOException;
374
375  /**
376   * Wait for the namenode.
377   */
378  public void waitForNamenodeAvailable() throws InterruptedException {
379  }
380
381  public void waitForDatanodesRegistered(int nbDN) throws Exception {
382  }
383}