001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.io.Closeable;
021import java.io.IOException;
022import org.apache.hadoop.conf.Configurable;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.client.RegionInfoBuilder;
025import org.apache.hadoop.hbase.util.Threads;
026import org.apache.yetus.audience.InterfaceAudience;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
031import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ClientService;
032import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService;
033
034/**
035 * This class defines methods that can help with managing HBase clusters
036 * from unit tests and system tests. There are 3 types of cluster deployments:
037 * <ul>
038 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads,
039 * used by unit tests</li>
040 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can
041 * interact with the cluster. </li>
042 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate
043 * JVMs. </li>
044 * </ul>
045 * <p>
046 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can
047 * be run against a mini-cluster during unit test execution, or a distributed cluster having
048 * tens/hundreds of nodes during execution of integration tests.
049 *
050 * <p>
051 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume
052 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster,
053 * and some tests will still need to mock stuff and introspect internal state. For those use
054 * cases from unit tests, or if more control is needed, you can use the subclasses directly.
055 * In that sense, this class does not abstract away <strong>every</strong> interface that
056 * MiniHBaseCluster or DistributedHBaseCluster provide.
057 */
058@InterfaceAudience.Private
059public abstract class HBaseCluster implements Closeable, Configurable {
060  // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope
061  static final Logger LOG = LoggerFactory.getLogger(HBaseCluster.class.getName());
062  protected Configuration conf;
063
064  /** the status of the cluster before we begin */
065  protected ClusterMetrics initialClusterStatus;
066
067  /**
068   * Construct an HBaseCluster
069   * @param conf Configuration to be used for cluster
070   */
071  public HBaseCluster(Configuration conf) {
072    setConf(conf);
073  }
074
075  @Override
076  public void setConf(Configuration conf) {
077    this.conf = conf;
078  }
079
080  @Override
081  public Configuration getConf() {
082    return conf;
083  }
084
085  /**
086   * Returns a ClusterMetrics for this HBase cluster.
087   * @see #getInitialClusterMetrics()
088   */
089  public abstract ClusterMetrics getClusterMetrics() throws IOException;
090
091  /**
092   * Returns a ClusterStatus for this HBase cluster as observed at the
093   * starting of the HBaseCluster
094   */
095  public ClusterMetrics getInitialClusterMetrics() throws IOException {
096    return initialClusterStatus;
097  }
098
099  /**
100   * Returns an {@link MasterService.BlockingInterface} to the active master
101   */
102  public abstract MasterService.BlockingInterface getMasterAdminService()
103  throws IOException;
104
105  /**
106   * Returns an AdminProtocol interface to the regionserver
107   */
108  public abstract AdminService.BlockingInterface getAdminProtocol(ServerName serverName)
109  throws IOException;
110
111  /**
112   * Returns a ClientProtocol interface to the regionserver
113   */
114  public abstract ClientService.BlockingInterface getClientProtocol(ServerName serverName)
115  throws IOException;
116
117  /**
118   * Starts a new region server on the given hostname or if this is a mini/local cluster,
119   * starts a region server locally.
120   * @param hostname the hostname to start the regionserver on
121   * @throws IOException if something goes wrong
122   */
123  public abstract void startRegionServer(String hostname, int port) throws IOException;
124
125  /**
126   * Kills the region server process if this is a distributed cluster, otherwise
127   * this causes the region server to exit doing basic clean up only.
128   * @throws IOException if something goes wrong
129   */
130  public abstract void killRegionServer(ServerName serverName) throws IOException;
131
132  /**
133   * Keeping track of killed servers and being able to check if a particular server was killed makes
134   * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
135   * example of such case is - killing servers and waiting for all regions of a particular table
136   * to be assigned. We can check for server column in META table and that its value is not one
137   * of the killed servers.
138   */
139  public abstract boolean isKilledRS(ServerName serverName);
140
141  /**
142   * Stops the given region server, by attempting a gradual stop.
143   * @throws IOException if something goes wrong
144   */
145  public abstract void stopRegionServer(ServerName serverName) throws IOException;
146
147  /**
148   * Wait for the specified region server to join the cluster
149   * @throws IOException if something goes wrong or timeout occurs
150   */
151  public void waitForRegionServerToStart(String hostname, int port, long timeout)
152      throws IOException {
153    long start = System.currentTimeMillis();
154    while ((System.currentTimeMillis() - start) < timeout) {
155      for (ServerName server : getClusterMetrics().getLiveServerMetrics().keySet()) {
156        if (server.getHostname().equals(hostname) && server.getPort() == port) {
157          return;
158        }
159      }
160      Threads.sleep(100);
161    }
162    throw new IOException("did timeout " + timeout + "ms waiting for region server to start: "
163        + hostname);
164  }
165
166  /**
167   * Wait for the specified region server to stop the thread / process.
168   * @throws IOException if something goes wrong or timeout occurs
169   */
170  public abstract void waitForRegionServerToStop(ServerName serverName, long timeout)
171      throws IOException;
172
173  /**
174   * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster,
175   * silently logs warning message.
176   * @param hostname the hostname to start the regionserver on
177   * @throws IOException if something goes wrong
178   */
179  public abstract void startZkNode(String hostname, int port) throws IOException;
180
181  /**
182   * Kills the zookeeper node process if this is a distributed cluster, otherwise,
183   * this causes master to exit doing basic clean up only.
184   * @throws IOException if something goes wrong
185   */
186  public abstract void killZkNode(ServerName serverName) throws IOException;
187
188  /**
189   * Stops the region zookeeper if this is a distributed cluster, otherwise
190   * silently logs warning message.
191   * @throws IOException if something goes wrong
192   */
193  public abstract void stopZkNode(ServerName serverName) throws IOException;
194
195  /**
196   * Wait for the specified zookeeper node to join the cluster
197   * @throws IOException if something goes wrong or timeout occurs
198   */
199  public abstract void waitForZkNodeToStart(ServerName serverName, long timeout)
200    throws IOException;
201
202  /**
203   * Wait for the specified zookeeper node to stop the thread / process.
204   * @throws IOException if something goes wrong or timeout occurs
205   */
206  public abstract void waitForZkNodeToStop(ServerName serverName, long timeout)
207    throws IOException;
208
209  /**
210   * Starts a new datanode on the given hostname or if this is a mini/local cluster,
211   * silently logs warning message.
212   * @throws IOException if something goes wrong
213   */
214  public abstract void startDataNode(ServerName serverName) throws IOException;
215
216  /**
217   * Kills the datanode process if this is a distributed cluster, otherwise,
218   * this causes master to exit doing basic clean up only.
219   * @throws IOException if something goes wrong
220   */
221  public abstract void killDataNode(ServerName serverName) throws IOException;
222
223  /**
224   * Stops the datanode if this is a distributed cluster, otherwise
225   * silently logs warning message.
226   * @throws IOException if something goes wrong
227   */
228  public abstract void stopDataNode(ServerName serverName) throws IOException;
229
230  /**
231   * Wait for the specified datanode to join the cluster
232   * @throws IOException if something goes wrong or timeout occurs
233   */
234  public abstract void waitForDataNodeToStart(ServerName serverName, long timeout)
235    throws IOException;
236
237  /**
238   * Wait for the specified datanode to stop the thread / process.
239   * @throws IOException if something goes wrong or timeout occurs
240   */
241  public abstract void waitForDataNodeToStop(ServerName serverName, long timeout)
242    throws IOException;
243
244  /**
245   * Starts a new master on the given hostname or if this is a mini/local cluster,
246   * starts a master locally.
247   * @param hostname the hostname to start the master on
248   * @throws IOException if something goes wrong
249   */
250  public abstract void startMaster(String hostname, int port) throws IOException;
251
252  /**
253   * Kills the master process if this is a distributed cluster, otherwise,
254   * this causes master to exit doing basic clean up only.
255   * @throws IOException if something goes wrong
256   */
257  public abstract void killMaster(ServerName serverName) throws IOException;
258
259  /**
260   * Stops the given master, by attempting a gradual stop.
261   * @throws IOException if something goes wrong
262   */
263  public abstract void stopMaster(ServerName serverName) throws IOException;
264
265  /**
266   * Wait for the specified master to stop the thread / process.
267   * @throws IOException if something goes wrong or timeout occurs
268   */
269  public abstract void waitForMasterToStop(ServerName serverName, long timeout)
270      throws IOException;
271
272  /**
273   * Blocks until there is an active master and that master has completed
274   * initialization.
275   *
276   * @return true if an active master becomes available.  false if there are no
277   *         masters left.
278   * @throws IOException if something goes wrong or timeout occurs
279   */
280  public boolean waitForActiveAndReadyMaster()
281      throws IOException {
282    return waitForActiveAndReadyMaster(Long.MAX_VALUE);
283  }
284
285  /**
286   * Blocks until there is an active master and that master has completed
287   * initialization.
288   * @param timeout the timeout limit in ms
289   * @return true if an active master becomes available.  false if there are no
290   *         masters left.
291   */
292  public abstract boolean waitForActiveAndReadyMaster(long timeout)
293      throws IOException;
294
295  /**
296   * Wait for HBase Cluster to shut down.
297   */
298  public abstract void waitUntilShutDown() throws IOException;
299
300  /**
301   * Shut down the HBase cluster
302   */
303  public abstract void shutdown() throws IOException;
304
305  /**
306   * Restores the cluster to it's initial state if this is a real cluster,
307   * otherwise does nothing.
308   * This is a best effort restore. If the servers are not reachable, or insufficient
309   * permissions, etc. restoration might be partial.
310   * @return whether restoration is complete
311   */
312  public boolean restoreInitialStatus() throws IOException {
313    return restoreClusterMetrics(getInitialClusterMetrics());
314  }
315
316  /**
317   * Restores the cluster to given state if this is a real cluster,
318   * otherwise does nothing.
319   * This is a best effort restore. If the servers are not reachable, or insufficient
320   * permissions, etc. restoration might be partial.
321   * @return whether restoration is complete
322   */
323  public boolean restoreClusterMetrics(ClusterMetrics desiredStatus) throws IOException {
324    return true;
325  }
326
327  /**
328   * Get the ServerName of region server serving the first hbase:meta region
329   */
330  public ServerName getServerHoldingMeta() throws IOException {
331    return getServerHoldingRegion(TableName.META_TABLE_NAME,
332      RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName());
333  }
334
335  /**
336   * Get the ServerName of region server serving the specified region
337   * @param regionName Name of the region in bytes
338   * @param tn Table name that has the region.
339   * @return ServerName that hosts the region or null
340   */
341  public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName)
342      throws IOException;
343
344  /**
345   * @return whether we are interacting with a distributed cluster as opposed to an
346   * in-process mini/local cluster.
347   */
348  public boolean isDistributedCluster() {
349    return false;
350  }
351
352  /**
353   * Closes all the resources held open for this cluster. Note that this call does not shutdown
354   * the cluster.
355   * @see #shutdown()
356   */
357  @Override
358  public abstract void close() throws IOException;
359
360  /**
361   * Wait for the namenode.
362   *
363   * @throws InterruptedException
364   */
365  public void waitForNamenodeAvailable() throws InterruptedException {
366  }
367
368  public void waitForDatanodesRegistered(int nbDN) throws Exception {
369  }
370}