001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.zookeeper;
019
020import java.io.EOFException;
021import java.io.IOException;
022import java.net.ConnectException;
023import java.net.NoRouteToHostException;
024import java.net.SocketException;
025import java.net.SocketTimeoutException;
026import java.net.UnknownHostException;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.List;
030import java.util.Locale;
031
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
035import org.apache.hadoop.hbase.ServerName;
036import org.apache.hadoop.hbase.client.ClusterConnection;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.client.RegionInfoBuilder;
039import org.apache.hadoop.hbase.client.RegionReplicaUtil;
040import org.apache.hadoop.hbase.client.RetriesExhaustedException;
041import org.apache.hadoop.hbase.exceptions.DeserializationException;
042import org.apache.hadoop.hbase.ipc.FailedServerException;
043import org.apache.hadoop.hbase.ipc.HBaseRpcController;
044import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
045import org.apache.hadoop.hbase.master.RegionState;
046import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.Pair;
049import org.apache.hadoop.ipc.RemoteException;
050import org.apache.yetus.audience.InterfaceAudience;
051import org.apache.zookeeper.KeeperException;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
055import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
058import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
059import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos;
060import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
061
062/**
063 * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper
064 * which keeps hbase:meta region server location.
065 *
066 * Stateless class with a bunch of static methods. Doesn't manage resources passed in
067 * (e.g. Connection, ZKWatcher etc).
068 *
069 * Meta region location is set by <code>RegionServerServices</code>.
070 * This class doesn't use ZK watchers, rather accesses ZK directly.
071 *
072 * This class it stateless. The only reason it's not made a non-instantiable util class
073 * with a collection of static methods is that it'd be rather hard to mock properly in tests.
074 *
075 * TODO: rewrite using RPC calls to master to find out about hbase:meta.
076 */
077@InterfaceAudience.Private
078public class MetaTableLocator {
079  private static final Logger LOG = LoggerFactory.getLogger(MetaTableLocator.class);
080
081  // only needed to allow non-timeout infinite waits to stop when cluster shuts down
082  private volatile boolean stopped = false;
083
084  /**
085   * Checks if the meta region location is available.
086   * @return true if meta region location is available, false if not
087   */
088  public boolean isLocationAvailable(ZKWatcher zkw) {
089    return getMetaRegionLocation(zkw) != null;
090  }
091
092  /**
093   * @param zkw ZooKeeper watcher to be used
094   * @return meta table regions and their locations.
095   */
096  public List<Pair<RegionInfo, ServerName>> getMetaRegionsAndLocations(ZKWatcher zkw) {
097    return getMetaRegionsAndLocations(zkw, RegionInfo.DEFAULT_REPLICA_ID);
098  }
099
100  /**
101   * Gets the meta regions and their locations for the given path and replica ID.
102   *
103   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
104   * @param replicaId the ID of the replica
105   * @return meta table regions and their locations.
106   */
107  public List<Pair<RegionInfo, ServerName>> getMetaRegionsAndLocations(ZKWatcher zkw,
108      int replicaId) {
109    ServerName serverName = getMetaRegionLocation(zkw, replicaId);
110    List<Pair<RegionInfo, ServerName>> list = new ArrayList<>(1);
111    list.add(new Pair<>(RegionReplicaUtil.getRegionInfoForReplica(
112        RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), serverName));
113    return list;
114  }
115
116  /**
117   * Gets the meta regions for the given path with the default replica ID.
118   *
119   * @param zkw ZooKeeper watcher to be used
120   * @return List of meta regions
121   */
122  public List<RegionInfo> getMetaRegions(ZKWatcher zkw) {
123    return getMetaRegions(zkw, RegionInfo.DEFAULT_REPLICA_ID);
124  }
125
126  /**
127   * Gets the meta regions for the given path and replica ID.
128   *
129   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
130   * @param replicaId the ID of the replica
131   * @return List of meta regions
132   */
133  public List<RegionInfo> getMetaRegions(ZKWatcher zkw, int replicaId) {
134    List<Pair<RegionInfo, ServerName>> result;
135    result = getMetaRegionsAndLocations(zkw, replicaId);
136    return getListOfRegionInfos(result);
137  }
138
139  private List<RegionInfo> getListOfRegionInfos(final List<Pair<RegionInfo, ServerName>> pairs) {
140    if (pairs == null || pairs.isEmpty()) {
141      return Collections.EMPTY_LIST;
142    }
143
144    List<RegionInfo> result = new ArrayList<>(pairs.size());
145    for (Pair<RegionInfo, ServerName> pair: pairs) {
146      result.add(pair.getFirst());
147    }
148    return result;
149  }
150
151  /**
152   * Gets the meta region location, if available.  Does not block.
153   * @param zkw zookeeper connection to use
154   * @return server name or null if we failed to get the data.
155   */
156  public ServerName getMetaRegionLocation(final ZKWatcher zkw) {
157    try {
158      RegionState state = getMetaRegionState(zkw);
159      return state.isOpened() ? state.getServerName() : null;
160    } catch (KeeperException ke) {
161      return null;
162    }
163  }
164
165  /**
166   * Gets the meta region location, if available.  Does not block.
167   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
168   * @param replicaId the ID of the replica
169   * @return server name
170   */
171  public ServerName getMetaRegionLocation(final ZKWatcher zkw, int replicaId) {
172    try {
173      RegionState state = getMetaRegionState(zkw, replicaId);
174      return state.isOpened() ? state.getServerName() : null;
175    } catch (KeeperException ke) {
176      return null;
177    }
178  }
179
180  /**
181   * Gets the meta region location, if available, and waits for up to the
182   * specified timeout if not immediately available.
183   * Given the zookeeper notification could be delayed, we will try to
184   * get the latest data.
185   *
186   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
187   * @param timeout maximum time to wait, in millis
188   * @return server name for server hosting meta region formatted as per
189   * {@link ServerName}, or null if none available
190   * @throws InterruptedException if interrupted while waiting
191   * @throws NotAllMetaRegionsOnlineException if a meta or root region is not online
192   */
193  public ServerName waitMetaRegionLocation(ZKWatcher zkw, long timeout)
194    throws InterruptedException, NotAllMetaRegionsOnlineException {
195    return waitMetaRegionLocation(zkw, RegionInfo.DEFAULT_REPLICA_ID, timeout);
196  }
197
198  /**
199   * Gets the meta region location, if available, and waits for up to the specified timeout if not
200   * immediately available. Given the zookeeper notification could be delayed, we will try to
201   * get the latest data.
202   *
203   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
204   * @param replicaId the ID of the replica
205   * @param timeout maximum time to wait, in millis
206   * @return server name for server hosting meta region formatted as per
207   * {@link ServerName}, or null if none available
208   * @throws InterruptedException if waiting for the socket operation fails
209   * @throws NotAllMetaRegionsOnlineException if a meta or root region is not online
210   */
211  public ServerName waitMetaRegionLocation(ZKWatcher zkw, int replicaId, long timeout)
212    throws InterruptedException, NotAllMetaRegionsOnlineException {
213    try {
214      if (ZKUtil.checkExists(zkw, zkw.getZNodePaths().baseZNode) == -1) {
215        String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
216            + "There could be a mismatch with the one configured in the master.";
217        LOG.error(errorMsg);
218        throw new IllegalArgumentException(errorMsg);
219      }
220    } catch (KeeperException e) {
221      throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
222    }
223    ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
224
225    if (sn == null) {
226      throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
227    }
228
229    return sn;
230  }
231
232  /**
233   * Waits indefinitely for availability of <code>hbase:meta</code>.  Used during
234   * cluster startup.  Does not verify meta, just that something has been
235   * set up in zk.
236   * @see #waitMetaRegionLocation(ZKWatcher, long)
237   * @throws InterruptedException if interrupted while waiting
238   */
239  public void waitMetaRegionLocation(ZKWatcher zkw) throws InterruptedException {
240    long startTime = System.currentTimeMillis();
241    while (!stopped) {
242      try {
243        if (waitMetaRegionLocation(zkw, 100) != null) {
244          break;
245        }
246
247        long sleepTime = System.currentTimeMillis() - startTime;
248        // +1 in case sleepTime=0
249        if ((sleepTime + 1) % 10000 == 0) {
250          LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
251        }
252      } catch (NotAllMetaRegionsOnlineException e) {
253        if (LOG.isTraceEnabled()) {
254          LOG.trace("hbase:meta still not available, sleeping and retrying." +
255            " Reason: " + e.getMessage());
256        }
257      }
258    }
259  }
260
261  /**
262   * Verify <code>hbase:meta</code> is deployed and accessible.
263   *
264   * @param hConnection the connection to use
265   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
266   * @param timeout How long to wait on zk for meta address (passed through to
267   *                the internal call to {@link #getMetaServerConnection}.
268   * @return True if the <code>hbase:meta</code> location is healthy.
269   * @throws IOException if the number of retries for getting the connection is exceeded
270   * @throws InterruptedException if waiting for the socket operation fails
271   */
272  public boolean verifyMetaRegionLocation(ClusterConnection hConnection, ZKWatcher zkw,
273      final long timeout) throws InterruptedException, IOException {
274    return verifyMetaRegionLocation(hConnection, zkw, timeout, RegionInfo.DEFAULT_REPLICA_ID);
275  }
276
277  /**
278   * Verify <code>hbase:meta</code> is deployed and accessible.
279   *
280   * @param connection the connection to use
281   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
282   * @param timeout How long to wait on zk for meta address (passed through to
283   * @param replicaId the ID of the replica
284   * @return True if the <code>hbase:meta</code> location is healthy.
285   * @throws InterruptedException if waiting for the socket operation fails
286   * @throws IOException if the number of retries for getting the connection is exceeded
287   */
288  public boolean verifyMetaRegionLocation(ClusterConnection connection, ZKWatcher zkw,
289      final long timeout, int replicaId) throws InterruptedException, IOException {
290    AdminProtos.AdminService.BlockingInterface service = null;
291    try {
292      service = getMetaServerConnection(connection, zkw, timeout, replicaId);
293    } catch (NotAllMetaRegionsOnlineException e) {
294      // Pass
295    } catch (ServerNotRunningYetException e) {
296      // Pass -- remote server is not up so can't be carrying root
297    } catch (UnknownHostException e) {
298      // Pass -- server name doesn't resolve so it can't be assigned anything.
299    } catch (RegionServerStoppedException e) {
300      // Pass -- server name sends us to a server that is dying or already dead.
301    }
302    return (service != null) && verifyRegionLocation(connection, service,
303            getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
304                RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId).getRegionName());
305  }
306
307  /**
308   * Verify we can connect to <code>hostingServer</code> and that its carrying
309   * <code>regionName</code>.
310   * @param hostingServer Interface to the server hosting <code>regionName</code>
311   * @param address The servername that goes with the <code>metaServer</code> interface.
312   *                Used logging.
313   * @param regionName The regionname we are interested in.
314   * @return True if we were able to verify the region located at other side of the interface.
315   */
316  // TODO: We should be able to get the ServerName from the AdminProtocol
317  // rather than have to pass it in.  Its made awkward by the fact that the
318  // HRI is likely a proxy against remote server so the getServerName needs
319  // to be fixed to go to a local method or to a cache before we can do this.
320  private boolean verifyRegionLocation(final ClusterConnection connection,
321      AdminService.BlockingInterface hostingServer, final ServerName address,
322      final byte [] regionName) {
323    if (hostingServer == null) {
324      LOG.info("Passed hostingServer is null");
325      return false;
326    }
327    Throwable t;
328    HBaseRpcController controller = connection.getRpcControllerFactory().newController();
329    try {
330      // Try and get regioninfo from the hosting server.
331      return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
332    } catch (ConnectException e) {
333      t = e;
334    } catch (RetriesExhaustedException e) {
335      t = e;
336    } catch (RemoteException e) {
337      IOException ioe = e.unwrapRemoteException();
338      t = ioe;
339    } catch (IOException e) {
340      Throwable cause = e.getCause();
341      if (cause != null && cause instanceof EOFException) {
342        t = cause;
343      } else if (cause != null && cause.getMessage() != null
344          && cause.getMessage().contains("Connection reset")) {
345        t = cause;
346      } else {
347        t = e;
348      }
349    }
350    LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
351      " at address=" + address + ", exception=" + t.getMessage());
352    return false;
353  }
354
355  /**
356   * Gets a connection to the server hosting meta, as reported by ZooKeeper, waiting up to the
357   * specified timeout for availability.
358   *
359   * <p>WARNING: Does not retry.  Use an {@link org.apache.hadoop.hbase.client.HTable} instead.
360   *
361   * @param connection the connection to use
362   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
363   * @param timeout How long to wait on meta location
364   * @param replicaId the ID of the replica
365   * @return connection to server hosting meta
366   * @throws InterruptedException if waiting for the socket operation fails
367   * @throws IOException if the number of retries for getting the connection is exceeded
368   */
369  private AdminService.BlockingInterface getMetaServerConnection(ClusterConnection connection,
370      ZKWatcher zkw, long timeout, int replicaId) throws InterruptedException, IOException {
371    return getCachedConnection(connection, waitMetaRegionLocation(zkw, replicaId, timeout));
372  }
373
374  /**
375   * @param sn ServerName to get a connection against.
376   * @return The AdminProtocol we got when we connected to <code>sn</code>
377   *         May have come from cache, may not be good, may have been setup by this invocation, or
378   *         may be null.
379   * @throws IOException if the number of retries for getting the connection is exceeded
380   */
381  private static AdminService.BlockingInterface getCachedConnection(ClusterConnection connection,
382      ServerName sn) throws IOException {
383    if (sn == null) {
384      return null;
385    }
386    AdminService.BlockingInterface service = null;
387    try {
388      service = connection.getAdmin(sn);
389    } catch (RetriesExhaustedException e) {
390      if (e.getCause() != null && e.getCause() instanceof ConnectException) {
391        LOG.debug("Catch this; presume it means the cached connection has gone bad.");
392      } else {
393        throw e;
394      }
395    } catch (SocketTimeoutException e) {
396      LOG.debug("Timed out connecting to " + sn);
397    } catch (NoRouteToHostException e) {
398      LOG.debug("Connecting to " + sn, e);
399    } catch (SocketException e) {
400      LOG.debug("Exception connecting to " + sn);
401    } catch (UnknownHostException e) {
402      LOG.debug("Unknown host exception connecting to  " + sn);
403    } catch (FailedServerException e) {
404      if (LOG.isDebugEnabled()) {
405        LOG.debug("Server " + sn + " is in failed server list.");
406      }
407    } catch (IOException ioe) {
408      Throwable cause = ioe.getCause();
409      if (ioe instanceof ConnectException) {
410        LOG.debug("Catch. Connect refused.");
411      } else if (cause != null && cause instanceof EOFException) {
412        LOG.debug("Catch. Other end disconnected us.");
413      } else if (cause != null && cause.getMessage() != null &&
414        cause.getMessage().toLowerCase(Locale.ROOT).contains("connection reset")) {
415        LOG.debug("Catch. Connection reset.");
416      } else {
417        throw ioe;
418      }
419
420    }
421    return service;
422  }
423
424  /**
425   * Sets the location of <code>hbase:meta</code> in ZooKeeper to the
426   * specified server address.
427   * @param zookeeper zookeeper reference
428   * @param serverName The server hosting <code>hbase:meta</code>
429   * @param state The region transition state
430   * @throws KeeperException unexpected zookeeper exception
431   */
432  public static void setMetaLocation(ZKWatcher zookeeper,
433      ServerName serverName, RegionState.State state) throws KeeperException {
434    setMetaLocation(zookeeper, serverName, RegionInfo.DEFAULT_REPLICA_ID, state);
435  }
436
437  /**
438   * Sets the location of <code>hbase:meta</code> in ZooKeeper to the specified server address.
439   * @param zookeeper reference to the {@link ZKWatcher} which also contains configuration and
440   *                  operation
441   * @param serverName the name of the server
442   * @param replicaId the ID of the replica
443   * @param state the state of the region
444   * @throws KeeperException if a ZooKeeper operation fails
445   */
446  public static void setMetaLocation(ZKWatcher zookeeper, ServerName serverName, int replicaId,
447      RegionState.State state) throws KeeperException {
448    if (serverName == null) {
449      LOG.warn("Tried to set null ServerName in hbase:meta; skipping -- ServerName required");
450      return;
451    }
452    LOG.info("Setting hbase:meta (replicaId={}) location in ZooKeeper as {}", replicaId,
453      serverName);
454    // Make the MetaRegionServer pb and then get its bytes and save this as
455    // the znode content.
456    MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
457      .setServer(ProtobufUtil.toServerName(serverName))
458      .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
459      .setState(state.convert()).build();
460    byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
461    try {
462      ZKUtil.setData(zookeeper,
463          zookeeper.getZNodePaths().getZNodeForReplica(replicaId), data);
464    } catch(KeeperException.NoNodeException nne) {
465      if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) {
466        LOG.debug("META region location doesn't exist, create it");
467      } else {
468        LOG.debug("META region location doesn't exist for replicaId=" + replicaId +
469            ", create it");
470      }
471      ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodePaths().getZNodeForReplica(replicaId),
472              data);
473    }
474  }
475
476  /**
477   * Load the meta region state from the meta server ZNode.
478   */
479  public static RegionState getMetaRegionState(ZKWatcher zkw) throws KeeperException {
480    return getMetaRegionState(zkw, RegionInfo.DEFAULT_REPLICA_ID);
481  }
482
483  /**
484   * Load the meta region state from the meta server ZNode.
485   *
486   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
487   * @param replicaId the ID of the replica
488   * @return regionstate
489   * @throws KeeperException if a ZooKeeper operation fails
490   */
491  public static RegionState getMetaRegionState(ZKWatcher zkw, int replicaId)
492          throws KeeperException {
493    RegionState.State state = RegionState.State.OPEN;
494    ServerName serverName = null;
495    try {
496      byte[] data = ZKUtil.getData(zkw, zkw.getZNodePaths().getZNodeForReplica(replicaId));
497      if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
498        try {
499          int prefixLen = ProtobufUtil.lengthOfPBMagic();
500          ZooKeeperProtos.MetaRegionServer rl =
501            ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom(data, prefixLen,
502                    data.length - prefixLen);
503          if (rl.hasState()) {
504            state = RegionState.State.convert(rl.getState());
505          }
506          HBaseProtos.ServerName sn = rl.getServer();
507          serverName = ServerName.valueOf(
508            sn.getHostName(), sn.getPort(), sn.getStartCode());
509        } catch (InvalidProtocolBufferException e) {
510          throw new DeserializationException("Unable to parse meta region location");
511        }
512      } else {
513        // old style of meta region location?
514        serverName = ProtobufUtil.parseServerNameFrom(data);
515      }
516    } catch (DeserializationException e) {
517      throw ZKUtil.convert(e);
518    } catch (InterruptedException e) {
519      Thread.currentThread().interrupt();
520    }
521    if (serverName == null) {
522      state = RegionState.State.OFFLINE;
523    }
524    return new RegionState(
525        RegionReplicaUtil.getRegionInfoForReplica(
526            RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId),
527        state, serverName);
528  }
529
530  /**
531   * Deletes the location of <code>hbase:meta</code> in ZooKeeper.
532   * @param zookeeper zookeeper reference
533   * @throws KeeperException unexpected zookeeper exception
534   */
535  public void deleteMetaLocation(ZKWatcher zookeeper)
536    throws KeeperException {
537    deleteMetaLocation(zookeeper, RegionInfo.DEFAULT_REPLICA_ID);
538  }
539
540  public void deleteMetaLocation(ZKWatcher zookeeper, int replicaId)
541    throws KeeperException {
542    if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) {
543      LOG.info("Deleting hbase:meta region location in ZooKeeper");
544    } else {
545      LOG.info("Deleting hbase:meta for {} region location in ZooKeeper", replicaId);
546    }
547    try {
548      // Just delete the node.  Don't need any watches.
549      ZKUtil.deleteNode(zookeeper, zookeeper.getZNodePaths().getZNodeForReplica(replicaId));
550    } catch(KeeperException.NoNodeException nne) {
551      // Has already been deleted
552    }
553  }
554  /**
555   * Wait until the primary meta region is available. Get the secondary locations as well but don't
556   * block for those.
557   *
558   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation
559   * @param timeout maximum time to wait in millis
560   * @param conf the {@link Configuration} to use
561   * @return ServerName or null if we timed out.
562   * @throws InterruptedException if waiting for the socket operation fails
563   */
564  public List<ServerName> blockUntilAvailable(final ZKWatcher zkw, final long timeout,
565      Configuration conf) throws InterruptedException {
566    int numReplicasConfigured = 1;
567
568    List<ServerName> servers = new ArrayList<>();
569    // Make the blocking call first so that we do the wait to know
570    // the znodes are all in place or timeout.
571    ServerName server = blockUntilAvailable(zkw, timeout);
572
573    if (server == null) {
574      return null;
575    }
576
577    servers.add(server);
578
579    try {
580      List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
581      numReplicasConfigured = metaReplicaNodes.size();
582    } catch (KeeperException e) {
583      LOG.warn("Got ZK exception {}", e);
584    }
585    for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
586      // return all replica locations for the meta
587      servers.add(getMetaRegionLocation(zkw, replicaId));
588    }
589    return servers;
590  }
591
592  /**
593   * Wait until the meta region is available and is not in transition.
594   * @param zkw zookeeper connection to use
595   * @param timeout maximum time to wait, in millis
596   * @return ServerName or null if we timed out.
597   * @throws InterruptedException if waiting for the socket operation fails
598   */
599  public ServerName blockUntilAvailable(final ZKWatcher zkw, final long timeout)
600          throws InterruptedException {
601    return blockUntilAvailable(zkw, RegionInfo.DEFAULT_REPLICA_ID, timeout);
602  }
603
604  /**
605   * Wait until the meta region is available and is not in transition.
606   *
607   * @param zkw reference to the {@link ZKWatcher} which also contains configuration and constants
608   * @param replicaId the ID of the replica
609   * @param timeout maximum time to wait in millis
610   * @return ServerName or null if we timed out.
611   * @throws InterruptedException if waiting for the socket operation fails
612   */
613  public ServerName blockUntilAvailable(final ZKWatcher zkw, int replicaId, final long timeout)
614          throws InterruptedException {
615    if (timeout < 0) {
616      throw new IllegalArgumentException();
617    }
618
619    if (zkw == null) {
620      throw new IllegalArgumentException();
621    }
622
623    long startTime = System.currentTimeMillis();
624    ServerName sn = null;
625    while (true) {
626      sn = getMetaRegionLocation(zkw, replicaId);
627      if (sn != null || (System.currentTimeMillis() - startTime)
628          > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
629        break;
630      }
631      Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
632    }
633    return sn;
634  }
635
636  /**
637   * Stop working.
638   * Interrupts any ongoing waits.
639   */
640  public void stop() {
641    if (!stopped) {
642      LOG.debug("Stopping MetaTableLocator");
643      stopped = true;
644    }
645  }
646}