001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.zookeeper;
019
020import java.io.EOFException;
021import java.io.IOException;
022import java.net.ConnectException;
023import java.net.NoRouteToHostException;
024import java.net.SocketException;
025import java.net.SocketTimeoutException;
026import java.net.UnknownHostException;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.List;
030import java.util.Locale;
031
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
035import org.apache.hadoop.hbase.ServerName;
036import org.apache.hadoop.hbase.client.ClusterConnection;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.client.RegionInfoBuilder;
039import org.apache.hadoop.hbase.client.RegionReplicaUtil;
040import org.apache.hadoop.hbase.client.RetriesExhaustedException;
041import org.apache.hadoop.hbase.exceptions.DeserializationException;
042import org.apache.hadoop.hbase.ipc.FailedServerException;
043import org.apache.hadoop.hbase.ipc.HBaseRpcController;
044import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
045import org.apache.hadoop.hbase.master.RegionState;
046import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.Pair;
049import org.apache.hadoop.ipc.RemoteException;
050import org.apache.yetus.audience.InterfaceAudience;
051import org.apache.zookeeper.KeeperException;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
055import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
058import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
059import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos;
060import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
061
062/**
063 * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper
064 * which keeps hbase:meta region server location.
065 *
066 * Stateless class with a bunch of static methods. Doesn't manage resources passed in
067 * (e.g. Connection, ZKWatcher etc).
068 *
069 * Meta region location is set by <code>RegionServerServices</code>.
070 * This class doesn't use ZK watchers, rather accesses ZK directly.
071 *
072 * This class it stateless. The only reason it's not made a non-instantiable util class
073 * with a collection of static methods is that it'd be rather hard to mock properly in tests.
074 *
075 * TODO: rewrite using RPC calls to master to find out about hbase:meta.
076 */
077@InterfaceAudience.Private
078public class MetaTableLocator {
079  private static final Logger LOG = LoggerFactory.getLogger(MetaTableLocator.class);
080
081  // only needed to allow non-timeout infinite waits to stop when cluster shuts down
082  private volatile boolean stopped = false;
083
084  /**
085   * Checks if the meta region location is available.
086   * @return true if meta region location is available, false if not
087   */
088  public boolean isLocationAvailable(ZKWatcher zkw) {
089    return getMetaRegionLocation(zkw) != null;
090  }
091
092  /**
093   * @param zkw ZooKeeper watcher to be used
094   * @return meta table regions and their locations.
095   */
096  public List<Pair<RegionInfo, ServerName>> getMetaRegionsAndLocations(ZKWatcher zkw) {
097    return getMetaRegionsAndLocations(zkw, RegionInfo.DEFAULT_REPLICA_ID);
098  }
099
100  /**
101   * Gets the meta regions and their locations for the given path and replica ID.
102   *
103   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
104   * @param replicaId the ID of the replica
105   * @return meta table regions and their locations.
106   */
107  public List<Pair<RegionInfo, ServerName>> getMetaRegionsAndLocations(ZKWatcher zkw,
108      int replicaId) {
109    ServerName serverName = getMetaRegionLocation(zkw, replicaId);
110    List<Pair<RegionInfo, ServerName>> list = new ArrayList<>(1);
111    list.add(new Pair<>(RegionReplicaUtil.getRegionInfoForReplica(
112        RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), serverName));
113    return list;
114  }
115
116  /**
117   * Gets the meta regions for the given path with the default replica ID.
118   *
119   * @param zkw ZooKeeper watcher to be used
120   * @return List of meta regions
121   */
122  public List<RegionInfo> getMetaRegions(ZKWatcher zkw) {
123    return getMetaRegions(zkw, RegionInfo.DEFAULT_REPLICA_ID);
124  }
125
126  /**
127   * Gets the meta regions for the given path and replica ID.
128   *
129   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
130   * @param replicaId the ID of the replica
131   * @return List of meta regions
132   */
133  public List<RegionInfo> getMetaRegions(ZKWatcher zkw, int replicaId) {
134    List<Pair<RegionInfo, ServerName>> result;
135    result = getMetaRegionsAndLocations(zkw, replicaId);
136    return getListOfRegionInfos(result);
137  }
138
139  private List<RegionInfo> getListOfRegionInfos(final List<Pair<RegionInfo, ServerName>> pairs) {
140    if (pairs == null || pairs.isEmpty()) {
141      return Collections.EMPTY_LIST;
142    }
143
144    List<RegionInfo> result = new ArrayList<>(pairs.size());
145    for (Pair<RegionInfo, ServerName> pair: pairs) {
146      result.add(pair.getFirst());
147    }
148    return result;
149  }
150
151  /**
152   * Gets the meta region location, if available.  Does not block.
153   * @param zkw zookeeper connection to use
154   * @return server name or null if we failed to get the data.
155   */
156  public ServerName getMetaRegionLocation(final ZKWatcher zkw) {
157    try {
158      RegionState state = getMetaRegionState(zkw);
159      return state.isOpened() ? state.getServerName() : null;
160    } catch (KeeperException ke) {
161      return null;
162    }
163  }
164
165  /**
166   * Gets the meta region location, if available.  Does not block.
167   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
168   * @param replicaId the ID of the replica
169   * @return server name
170   */
171  public ServerName getMetaRegionLocation(final ZKWatcher zkw, int replicaId) {
172    try {
173      RegionState state = getMetaRegionState(zkw, replicaId);
174      return state.isOpened() ? state.getServerName() : null;
175    } catch (KeeperException ke) {
176      return null;
177    }
178  }
179
180  /**
181   * Gets the meta region location, if available, and waits for up to the
182   * specified timeout if not immediately available.
183   * Given the zookeeper notification could be delayed, we will try to
184   * get the latest data.
185   *
186   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
187   * @param timeout maximum time to wait, in millis
188   * @return server name for server hosting meta region formatted as per
189   * {@link ServerName}, or null if none available
190   * @throws InterruptedException if interrupted while waiting
191   * @throws NotAllMetaRegionsOnlineException if a meta or root region is not online
192   */
193  public ServerName waitMetaRegionLocation(ZKWatcher zkw, long timeout)
194  throws InterruptedException, NotAllMetaRegionsOnlineException {
195    return waitMetaRegionLocation(zkw, RegionInfo.DEFAULT_REPLICA_ID, timeout);
196  }
197
198  /**
199   * Gets the meta region location, if available, and waits for up to the specified timeout if not
200   * immediately available. Given the zookeeper notification could be delayed, we will try to
201   * get the latest data.
202   *
203   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
204   * @param replicaId the ID of the replica
205   * @param timeout maximum time to wait, in millis
206   * @return server name for server hosting meta region formatted as per
207   * {@link ServerName}, or null if none available
208   * @throws InterruptedException if waiting for the socket operation fails
209   * @throws NotAllMetaRegionsOnlineException if a meta or root region is not online
210   */
211  public ServerName waitMetaRegionLocation(ZKWatcher zkw, int replicaId, long timeout)
212  throws InterruptedException, NotAllMetaRegionsOnlineException {
213    try {
214      if (ZKUtil.checkExists(zkw, zkw.znodePaths.baseZNode) == -1) {
215        String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
216            + "There could be a mismatch with the one configured in the master.";
217        LOG.error(errorMsg);
218        throw new IllegalArgumentException(errorMsg);
219      }
220    } catch (KeeperException e) {
221      throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
222    }
223    ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
224
225    if (sn == null) {
226      throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
227    }
228
229    return sn;
230  }
231
232  /**
233   * Waits indefinitely for availability of <code>hbase:meta</code>.  Used during
234   * cluster startup.  Does not verify meta, just that something has been
235   * set up in zk.
236   * @see #waitMetaRegionLocation(ZKWatcher, long)
237   * @throws InterruptedException if interrupted while waiting
238   */
239  public void waitMetaRegionLocation(ZKWatcher zkw) throws InterruptedException {
240    long startTime = System.currentTimeMillis();
241    while (!stopped) {
242      try {
243        if (waitMetaRegionLocation(zkw, 100) != null) {
244          break;
245        }
246
247        long sleepTime = System.currentTimeMillis() - startTime;
248        // +1 in case sleepTime=0
249        if ((sleepTime + 1) % 10000 == 0) {
250          LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
251        }
252      } catch (NotAllMetaRegionsOnlineException e) {
253        if (LOG.isTraceEnabled()) {
254          LOG.trace("hbase:meta still not available, sleeping and retrying." +
255            " Reason: " + e.getMessage());
256        }
257      }
258    }
259  }
260
261  /**
262   * Verify <code>hbase:meta</code> is deployed and accessible.
263   *
264   * @param hConnection the connection to use
265   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
266   * @param timeout How long to wait on zk for meta address (passed through to
267   *                the internal call to {@link #getMetaServerConnection}.
268   * @return True if the <code>hbase:meta</code> location is healthy.
269   * @throws IOException if the number of retries for getting the connection is exceeded
270   * @throws InterruptedException if waiting for the socket operation fails
271   */
272  public boolean verifyMetaRegionLocation(ClusterConnection hConnection, ZKWatcher zkw,
273      final long timeout) throws InterruptedException, IOException {
274    return verifyMetaRegionLocation(hConnection, zkw, timeout, RegionInfo.DEFAULT_REPLICA_ID);
275  }
276
277  /**
278   * Verify <code>hbase:meta</code> is deployed and accessible.
279   *
280   * @param connection the connection to use
281   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
282   * @param timeout How long to wait on zk for meta address (passed through to
283   * @param replicaId the ID of the replica
284   * @return True if the <code>hbase:meta</code> location is healthy.
285   * @throws InterruptedException if waiting for the socket operation fails
286   * @throws IOException if the number of retries for getting the connection is exceeded
287   */
288  public boolean verifyMetaRegionLocation(ClusterConnection connection, ZKWatcher zkw,
289      final long timeout, int replicaId) throws InterruptedException, IOException {
290    AdminProtos.AdminService.BlockingInterface service = null;
291    try {
292      service = getMetaServerConnection(connection, zkw, timeout, replicaId);
293    } catch (NotAllMetaRegionsOnlineException e) {
294      // Pass
295    } catch (ServerNotRunningYetException e) {
296      // Pass -- remote server is not up so can't be carrying root
297    } catch (UnknownHostException e) {
298      // Pass -- server name doesn't resolve so it can't be assigned anything.
299    } catch (RegionServerStoppedException e) {
300      // Pass -- server name sends us to a server that is dying or already dead.
301    }
302    return (service != null) && verifyRegionLocation(connection, service,
303            getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
304                RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId).getRegionName());
305  }
306
307  /**
308   * Verify we can connect to <code>hostingServer</code> and that its carrying
309   * <code>regionName</code>.
310   * @param hostingServer Interface to the server hosting <code>regionName</code>
311   * @param address The servername that goes with the <code>metaServer</code> interface.
312   *                Used logging.
313   * @param regionName The regionname we are interested in.
314   * @return True if we were able to verify the region located at other side of the interface.
315   */
316  // TODO: We should be able to get the ServerName from the AdminProtocol
317  // rather than have to pass it in.  Its made awkward by the fact that the
318  // HRI is likely a proxy against remote server so the getServerName needs
319  // to be fixed to go to a local method or to a cache before we can do this.
320  private boolean verifyRegionLocation(final ClusterConnection connection,
321      AdminService.BlockingInterface hostingServer, final ServerName address,
322      final byte [] regionName) {
323    if (hostingServer == null) {
324      LOG.info("Passed hostingServer is null");
325      return false;
326    }
327    Throwable t;
328    HBaseRpcController controller = connection.getRpcControllerFactory().newController();
329    try {
330      // Try and get regioninfo from the hosting server.
331      return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
332    } catch (ConnectException e) {
333      t = e;
334    } catch (RetriesExhaustedException e) {
335      t = e;
336    } catch (RemoteException e) {
337      IOException ioe = e.unwrapRemoteException();
338      t = ioe;
339    } catch (IOException e) {
340      Throwable cause = e.getCause();
341      if (cause != null && cause instanceof EOFException) {
342        t = cause;
343      } else if (cause != null && cause.getMessage() != null
344          && cause.getMessage().contains("Connection reset")) {
345        t = cause;
346      } else {
347        t = e;
348      }
349    }
350    LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
351      " at address=" + address + ", exception=" + t.getMessage());
352    return false;
353  }
354
355  /**
356   * Gets a connection to the server hosting meta, as reported by ZooKeeper, waiting up to the
357   * specified timeout for availability.
358   *
359   * <p>WARNING: Does not retry.  Use an {@link org.apache.hadoop.hbase.client.HTable} instead.
360   *
361   * @param connection the connection to use
362   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
363   * @param timeout How long to wait on meta location
364   * @param replicaId the ID of the replica
365   * @return connection to server hosting meta
366   * @throws InterruptedException if waiting for the socket operation fails
367   * @throws IOException if the number of retries for getting the connection is exceeded
368   */
369  private AdminService.BlockingInterface getMetaServerConnection(ClusterConnection connection,
370      ZKWatcher zkw, long timeout, int replicaId) throws InterruptedException, IOException {
371    return getCachedConnection(connection, waitMetaRegionLocation(zkw, replicaId, timeout));
372  }
373
374  /**
375   * @param sn ServerName to get a connection against.
376   * @return The AdminProtocol we got when we connected to <code>sn</code>
377   *         May have come from cache, may not be good, may have been setup by this invocation, or
378   *         may be null.
379   * @throws IOException if the number of retries for getting the connection is exceeded
380   */
381  private static AdminService.BlockingInterface getCachedConnection(ClusterConnection connection,
382      ServerName sn) throws IOException {
383    if (sn == null) {
384      return null;
385    }
386    AdminService.BlockingInterface service = null;
387    try {
388      service = connection.getAdmin(sn);
389    } catch (RetriesExhaustedException e) {
390      if (e.getCause() != null && e.getCause() instanceof ConnectException) {
391        // Catch this; presume it means the cached connection has gone bad.
392      } else {
393        throw e;
394      }
395    } catch (SocketTimeoutException e) {
396      LOG.debug("Timed out connecting to " + sn);
397    } catch (NoRouteToHostException e) {
398      LOG.debug("Connecting to " + sn, e);
399    } catch (SocketException e) {
400      LOG.debug("Exception connecting to " + sn);
401    } catch (UnknownHostException e) {
402      LOG.debug("Unknown host exception connecting to  " + sn);
403    } catch (FailedServerException e) {
404      if (LOG.isDebugEnabled()) {
405        LOG.debug("Server " + sn + " is in failed server list.");
406      }
407    } catch (IOException ioe) {
408      Throwable cause = ioe.getCause();
409      if (ioe instanceof ConnectException) {
410        // Catch. Connect refused.
411      } else if (cause != null && cause instanceof EOFException) {
412        // Catch. Other end disconnected us.
413      } else if (cause != null && cause.getMessage() != null &&
414        cause.getMessage().toLowerCase(Locale.ROOT).contains("connection reset")) {
415        // Catch. Connection reset.
416      } else {
417        throw ioe;
418      }
419
420    }
421    return service;
422  }
423
424  /**
425   * Sets the location of <code>hbase:meta</code> in ZooKeeper to the
426   * specified server address.
427   * @param zookeeper zookeeper reference
428   * @param serverName The server hosting <code>hbase:meta</code>
429   * @param state The region transition state
430   * @throws KeeperException unexpected zookeeper exception
431   */
432  public static void setMetaLocation(ZKWatcher zookeeper,
433      ServerName serverName, RegionState.State state) throws KeeperException {
434    setMetaLocation(zookeeper, serverName, RegionInfo.DEFAULT_REPLICA_ID, state);
435  }
436
437  /**
438   * Sets the location of <code>hbase:meta</code> in ZooKeeper to the specified server address.
439   * @param zookeeper reference to the {@link ZKWatcher} which also contains configuration and
440   *                  operation
441   * @param serverName the name of the server
442   * @param replicaId the ID of the replica
443   * @param state the state of the region
444   * @throws KeeperException if a ZooKeeper operation fails
445   */
446  public static void setMetaLocation(ZKWatcher zookeeper, ServerName serverName, int replicaId,
447      RegionState.State state) throws KeeperException {
448    if (serverName == null) {
449      LOG.warn("Tried to set null ServerName in hbase:meta; skipping -- ServerName required");
450      return;
451    }
452    LOG.info("Setting hbase:meta (replicaId=" + replicaId + ") location in ZooKeeper as " +
453      serverName);
454    // Make the MetaRegionServer pb and then get its bytes and save this as
455    // the znode content.
456    MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
457      .setServer(ProtobufUtil.toServerName(serverName))
458      .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
459      .setState(state.convert()).build();
460    byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
461    try {
462      ZKUtil.setData(zookeeper,
463          zookeeper.znodePaths.getZNodeForReplica(replicaId), data);
464    } catch(KeeperException.NoNodeException nne) {
465      if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) {
466        LOG.debug("META region location doesn't exist, create it");
467      } else {
468        LOG.debug("META region location doesn't exist for replicaId=" + replicaId +
469            ", create it");
470      }
471      ZKUtil.createAndWatch(zookeeper, zookeeper.znodePaths.getZNodeForReplica(replicaId), data);
472    }
473  }
474
475  /**
476   * Load the meta region state from the meta server ZNode.
477   */
478  public static RegionState getMetaRegionState(ZKWatcher zkw) throws KeeperException {
479    return getMetaRegionState(zkw, RegionInfo.DEFAULT_REPLICA_ID);
480  }
481
482  /**
483   * Load the meta region state from the meta server ZNode.
484   *
485   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
486   * @param replicaId the ID of the replica
487   * @return regionstate
488   * @throws KeeperException if a ZooKeeper operation fails
489   */
490  public static RegionState getMetaRegionState(ZKWatcher zkw, int replicaId)
491          throws KeeperException {
492    RegionState.State state = RegionState.State.OPEN;
493    ServerName serverName = null;
494    try {
495      byte[] data = ZKUtil.getData(zkw, zkw.znodePaths.getZNodeForReplica(replicaId));
496      if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
497        try {
498          int prefixLen = ProtobufUtil.lengthOfPBMagic();
499          ZooKeeperProtos.MetaRegionServer rl =
500            ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom(data, prefixLen,
501                    data.length - prefixLen);
502          if (rl.hasState()) {
503            state = RegionState.State.convert(rl.getState());
504          }
505          HBaseProtos.ServerName sn = rl.getServer();
506          serverName = ServerName.valueOf(
507            sn.getHostName(), sn.getPort(), sn.getStartCode());
508        } catch (InvalidProtocolBufferException e) {
509          throw new DeserializationException("Unable to parse meta region location");
510        }
511      } else {
512        // old style of meta region location?
513        serverName = ProtobufUtil.parseServerNameFrom(data);
514      }
515    } catch (DeserializationException e) {
516      throw ZKUtil.convert(e);
517    } catch (InterruptedException e) {
518      Thread.currentThread().interrupt();
519    }
520    if (serverName == null) {
521      state = RegionState.State.OFFLINE;
522    }
523    return new RegionState(
524        RegionReplicaUtil.getRegionInfoForReplica(
525            RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId),
526        state, serverName);
527  }
528
529  /**
530   * Deletes the location of <code>hbase:meta</code> in ZooKeeper.
531   * @param zookeeper zookeeper reference
532   * @throws KeeperException unexpected zookeeper exception
533   */
534  public void deleteMetaLocation(ZKWatcher zookeeper)
535  throws KeeperException {
536    deleteMetaLocation(zookeeper, RegionInfo.DEFAULT_REPLICA_ID);
537  }
538
539  public void deleteMetaLocation(ZKWatcher zookeeper, int replicaId)
540  throws KeeperException {
541    if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) {
542      LOG.info("Deleting hbase:meta region location in ZooKeeper");
543    } else {
544      LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper");
545    }
546    try {
547      // Just delete the node.  Don't need any watches.
548      ZKUtil.deleteNode(zookeeper, zookeeper.znodePaths.getZNodeForReplica(replicaId));
549    } catch(KeeperException.NoNodeException nne) {
550      // Has already been deleted
551    }
552  }
553  /**
554   * Wait until the primary meta region is available. Get the secondary locations as well but don't
555   * block for those.
556   *
557   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
558   * @param timeout maximum time to wait in millis
559   * @param conf the {@link Configuration} to use
560   * @return ServerName or null if we timed out.
561   * @throws InterruptedException if waiting for the socket operation fails
562   */
563  public List<ServerName> blockUntilAvailable(final ZKWatcher zkw, final long timeout,
564      Configuration conf) throws InterruptedException {
565    int numReplicasConfigured = 1;
566
567    List<ServerName> servers = new ArrayList<>();
568    // Make the blocking call first so that we do the wait to know
569    // the znodes are all in place or timeout.
570    ServerName server = blockUntilAvailable(zkw, timeout);
571
572    if (server == null) {
573      return null;
574    }
575
576    servers.add(server);
577
578    try {
579      List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
580      numReplicasConfigured = metaReplicaNodes.size();
581    } catch (KeeperException e) {
582      LOG.warn("Got ZK exception " + e);
583    }
584    for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
585      // return all replica locations for the meta
586      servers.add(getMetaRegionLocation(zkw, replicaId));
587    }
588    return servers;
589  }
590
591  /**
592   * Wait until the meta region is available and is not in transition.
593   * @param zkw zookeeper connection to use
594   * @param timeout maximum time to wait, in millis
595   * @return ServerName or null if we timed out.
596   * @throws InterruptedException if waiting for the socket operation fails
597   */
598  public ServerName blockUntilAvailable(final ZKWatcher zkw, final long timeout)
599          throws InterruptedException {
600    return blockUntilAvailable(zkw, RegionInfo.DEFAULT_REPLICA_ID, timeout);
601  }
602
603  /**
604   * Wait until the meta region is available and is not in transition.
605   *
606   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and constants
607   * @param replicaId the ID of the replica
608   * @param timeout maximum time to wait in millis
609   * @return ServerName or null if we timed out.
610   * @throws InterruptedException if waiting for the socket operation fails
611   */
612  public ServerName blockUntilAvailable(final ZKWatcher zkw, int replicaId, final long timeout)
613          throws InterruptedException {
614    if (timeout < 0) {
615      throw new IllegalArgumentException();
616    }
617
618    if (zkw == null) {
619      throw new IllegalArgumentException();
620    }
621
622    long startTime = System.currentTimeMillis();
623    ServerName sn = null;
624    while (true) {
625      sn = getMetaRegionLocation(zkw, replicaId);
626      if (sn != null || (System.currentTimeMillis() - startTime)
627          > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
628        break;
629      }
630      Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
631    }
632    return sn;
633  }
634
635  /**
636   * Stop working.
637   * Interrupts any ongoing waits.
638   */
639  public void stop() {
640    if (!stopped) {
641      LOG.debug("Stopping MetaTableLocator");
642      stopped = true;
643    }
644  }
645}