View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.zookeeper;
19  
20  import com.google.common.base.Stopwatch;
21  import com.google.protobuf.InvalidProtocolBufferException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.client.ClusterConnection;
33  import org.apache.hadoop.hbase.client.Connection;
34  import org.apache.hadoop.hbase.client.HConnection;
35  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
36  import org.apache.hadoop.hbase.client.RetriesExhaustedException;
37  import org.apache.hadoop.hbase.exceptions.DeserializationException;
38  import org.apache.hadoop.hbase.ServerName;
39  import org.apache.hadoop.hbase.ipc.FailedServerException;
40  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
41  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
42  import org.apache.hadoop.hbase.master.RegionState;
43  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
44  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
45  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
47  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
48  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
49  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.Pair;
52  import org.apache.hadoop.ipc.RemoteException;
53  import org.apache.zookeeper.KeeperException;
54  
55  import java.io.EOFException;
56  import java.io.IOException;
57  import java.net.ConnectException;
58  import java.net.NoRouteToHostException;
59  import java.net.SocketException;
60  import java.net.SocketTimeoutException;
61  import java.net.UnknownHostException;
62  
63  import java.util.List;
64  import java.util.ArrayList;
65  
66  /**
67   * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper
68   * which keeps hbase:meta region server location.
69   *
70   * Stateless class with a bunch of static methods. Doesn't manage resources passed in
71   * (e.g. HConnection, ZooKeeperWatcher etc).
72   *
73   * Meta region location is set by <code>RegionServerServices</code>.
74   * This class doesn't use ZK watchers, rather accesses ZK directly.
75   *
76   * This class it stateless. The only reason it's not made a non-instantiable util class
77   * with a collection of static methods is that it'd be rather hard to mock properly in tests.
78   *
79   * TODO: rewrite using RPC calls to master to find out about hbase:meta.
80   */
81  @InterfaceAudience.Private
82  public class MetaTableLocator {
83    private static final Log LOG = LogFactory.getLog(MetaTableLocator.class);
84  
85    // only needed to allow non-timeout infinite waits to stop when cluster shuts down
86    private volatile boolean stopped = false;
87  
88    /**
89     * Checks if the meta region location is available.
90     * @return true if meta region location is available, false if not
91     */
92    public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
93      return getMetaRegionLocation(zkw) != null;
94    }
95  
96    /**
97     * @param zkw ZooKeeper watcher to be used
98     * @return meta table regions and their locations.
99     */
100   public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
101     return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
102   }
103 
104   /**
105    * 
106    * @param zkw
107    * @param replicaId
108    * @return meta table regions and their locations.
109    */
110   public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw,
111       int replicaId) {
112     ServerName serverName = getMetaRegionLocation(zkw, replicaId);
113     List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
114     list.add(new Pair<HRegionInfo, ServerName>(RegionReplicaUtil.getRegionInfoForReplica(
115         HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName));
116     return list;
117   }
118 
119   /**
120    * @param zkw ZooKeeper watcher to be used
121    * @return List of meta regions
122    */
123   public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
124     return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
125   }
126 
127   /**
128    * 
129    * @param zkw
130    * @param replicaId
131    * @return List of meta regions
132    */
133   public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw, int replicaId) {
134     List<Pair<HRegionInfo, ServerName>> result;
135     result = getMetaRegionsAndLocations(zkw, replicaId);
136     return getListOfHRegionInfos(result);
137   }
138 
139   private List<HRegionInfo> getListOfHRegionInfos(
140       final List<Pair<HRegionInfo, ServerName>> pairs) {
141     if (pairs == null || pairs.isEmpty()) return null;
142     List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
143     for (Pair<HRegionInfo, ServerName> pair: pairs) {
144       result.add(pair.getFirst());
145     }
146     return result;
147   }
148 
149   /**
150    * Gets the meta region location, if available.  Does not block.
151    * @param zkw zookeeper connection to use
152    * @return server name or null if we failed to get the data.
153    */
154   public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
155     try {
156       RegionState state = getMetaRegionState(zkw);
157       return state.isOpened() ? state.getServerName() : null;
158     } catch (KeeperException ke) {
159       return null;
160     }
161   }
162 
163   /**
164    * Gets the meta region location, if available.  Does not block.
165    * @param zkw
166    * @param replicaId
167    * @return server name
168    */
169   public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) {
170     try {
171       RegionState state = getMetaRegionState(zkw, replicaId);
172       return state.isOpened() ? state.getServerName() : null;
173     } catch (KeeperException ke) {
174       return null;
175     }
176   }
177 
178   /**
179    * Gets the meta region location, if available, and waits for up to the
180    * specified timeout if not immediately available.
181    * Given the zookeeper notification could be delayed, we will try to
182    * get the latest data.
183    * @param zkw
184    * @param timeout maximum time to wait, in millis
185    * @return server name for server hosting meta region formatted as per
186    * {@link ServerName}, or null if none available
187    * @throws InterruptedException if interrupted while waiting
188    * @throws NotAllMetaRegionsOnlineException
189    */
190   public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)
191   throws InterruptedException, NotAllMetaRegionsOnlineException {
192     return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
193   }
194 
195   /**
196    * Gets the meta region location, if available, and waits for up to the
197    * specified timeout if not immediately available.
198    * Given the zookeeper notification could be delayed, we will try to
199    * get the latest data.
200    * @param zkw
201    * @param replicaId
202    * @param timeout maximum time to wait, in millis
203    * @return server name for server hosting meta region formatted as per
204    * {@link ServerName}, or null if none available
205    * @throws InterruptedException
206    * @throws NotAllMetaRegionsOnlineException
207    */
208   public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)
209   throws InterruptedException, NotAllMetaRegionsOnlineException {
210     try {
211       if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
212         String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
213             + "There could be a mismatch with the one configured in the master.";
214         LOG.error(errorMsg);
215         throw new IllegalArgumentException(errorMsg);
216       }
217     } catch (KeeperException e) {
218       throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
219     }
220     ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
221 
222     if (sn == null) {
223       throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
224     }
225 
226     return sn;
227   }
228 
229   /**
230    * Waits indefinitely for availability of <code>hbase:meta</code>.  Used during
231    * cluster startup.  Does not verify meta, just that something has been
232    * set up in zk.
233    * @see #waitMetaRegionLocation(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher, long)
234    * @throws InterruptedException if interrupted while waiting
235    */
236   public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException {
237     Stopwatch stopwatch = new Stopwatch().start();
238     while (!stopped) {
239       try {
240         if (waitMetaRegionLocation(zkw, 100) != null) break;
241         long sleepTime = stopwatch.elapsedMillis();
242         // +1 in case sleepTime=0
243         if ((sleepTime + 1) % 10000 == 0) {
244           LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
245         }
246       } catch (NotAllMetaRegionsOnlineException e) {
247         if (LOG.isTraceEnabled()) {
248           LOG.trace("hbase:meta still not available, sleeping and retrying." +
249             " Reason: " + e.getMessage());
250         }
251       }
252     }
253   }
254 
255   /**
256    * Verify <code>hbase:meta</code> is deployed and accessible.
257    * @param hConnection
258    * @param zkw
259    * @param timeout How long to wait on zk for meta address (passed through to
260    * the internal call to {@link #getMetaServerConnection}.
261    * @return True if the <code>hbase:meta</code> location is healthy.
262    * @throws java.io.IOException
263    * @throws InterruptedException
264    */
265   public boolean verifyMetaRegionLocation(HConnection hConnection,
266       ZooKeeperWatcher zkw, final long timeout)
267   throws InterruptedException, IOException {
268     return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID);
269   }
270 
271   /**
272    * Verify <code>hbase:meta</code> is deployed and accessible.
273    * @param hConnection
274    * @param zkw
275    * @param timeout How long to wait on zk for meta address (passed through to
276    * @param replicaId
277    * @return True if the <code>hbase:meta</code> location is healthy.
278    * @throws InterruptedException
279    * @throws IOException
280    */
281   public boolean verifyMetaRegionLocation(HConnection hConnection,
282       ZooKeeperWatcher zkw, final long timeout, int replicaId)
283   throws InterruptedException, IOException {
284     AdminProtos.AdminService.BlockingInterface service = null;
285     try {
286       service = getMetaServerConnection(hConnection, zkw, timeout, replicaId);
287     } catch (NotAllMetaRegionsOnlineException e) {
288       // Pass
289     } catch (ServerNotRunningYetException e) {
290       // Pass -- remote server is not up so can't be carrying root
291     } catch (UnknownHostException e) {
292       // Pass -- server name doesn't resolve so it can't be assigned anything.
293     } catch (RegionServerStoppedException e) {
294       // Pass -- server name sends us to a server that is dying or already dead.
295     }
296     return (service != null) && verifyRegionLocation(hConnection, service,
297             getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
298                 HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName());
299   }
300 
301   /**
302    * Verify we can connect to <code>hostingServer</code> and that its carrying
303    * <code>regionName</code>.
304    * @param hostingServer Interface to the server hosting <code>regionName</code>
305    * @param address The servername that goes with the <code>metaServer</code>
306    * Interface.  Used logging.
307    * @param regionName The regionname we are interested in.
308    * @return True if we were able to verify the region located at other side of
309    * the Interface.
310    * @throws IOException
311    */
312   // TODO: We should be able to get the ServerName from the AdminProtocol
313   // rather than have to pass it in.  Its made awkward by the fact that the
314   // HRI is likely a proxy against remote server so the getServerName needs
315   // to be fixed to go to a local method or to a cache before we can do this.
316   private boolean verifyRegionLocation(final Connection connection,
317       AdminService.BlockingInterface hostingServer, final ServerName address,
318       final byte [] regionName)
319   throws IOException {
320     if (hostingServer == null) {
321       LOG.info("Passed hostingServer is null");
322       return false;
323     }
324     Throwable t;
325     PayloadCarryingRpcController controller = null;
326     if (connection instanceof ClusterConnection) {
327       controller = ((ClusterConnection) connection).getRpcControllerFactory().newController();
328     }
329     try {
330       // Try and get regioninfo from the hosting server.
331       return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
332     } catch (ConnectException e) {
333       t = e;
334     } catch (RetriesExhaustedException e) {
335       t = e;
336     } catch (RemoteException e) {
337       IOException ioe = e.unwrapRemoteException();
338       t = ioe;
339     } catch (IOException e) {
340       Throwable cause = e.getCause();
341       if (cause != null && cause instanceof EOFException) {
342         t = cause;
343       } else if (cause != null && cause.getMessage() != null
344           && cause.getMessage().contains("Connection reset")) {
345         t = cause;
346       } else {
347         t = e;
348       }
349     }
350     LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
351       " at address=" + address + ", exception=" + t.getMessage());
352     return false;
353   }
354 
355   /**
356    * Gets a connection to the server hosting meta, as reported by ZooKeeper,
357    * waiting up to the specified timeout for availability.
358    * <p>WARNING: Does not retry.  Use an {@link org.apache.hadoop.hbase.client.HTable} instead.
359    * @param hConnection
360    * @param zkw
361    * @param timeout How long to wait on meta location
362    * @param replicaId
363    * @return connection to server hosting meta
364    * @throws InterruptedException
365    * @throws NotAllMetaRegionsOnlineException if timed out waiting
366    * @throws IOException
367    */
368   private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection,
369       ZooKeeperWatcher zkw, long timeout, int replicaId)
370   throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
371     return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout));
372   }
373 
374   /**
375    * @param sn ServerName to get a connection against.
376    * @return The AdminProtocol we got when we connected to <code>sn</code>
377    * May have come from cache, may not be good, may have been setup by this
378    * invocation, or may be null.
379    * @throws IOException
380    */
381   @SuppressWarnings("deprecation")
382   private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection,
383     ServerName sn)
384   throws IOException {
385     if (sn == null) {
386       return null;
387     }
388     AdminService.BlockingInterface service = null;
389     try {
390       service = hConnection.getAdmin(sn);
391     } catch (RetriesExhaustedException e) {
392       if (e.getCause() != null && e.getCause() instanceof ConnectException) {
393         // Catch this; presume it means the cached connection has gone bad.
394       } else {
395         throw e;
396       }
397     } catch (SocketTimeoutException e) {
398       LOG.debug("Timed out connecting to " + sn);
399     } catch (NoRouteToHostException e) {
400       LOG.debug("Connecting to " + sn, e);
401     } catch (SocketException e) {
402       LOG.debug("Exception connecting to " + sn);
403     } catch (UnknownHostException e) {
404       LOG.debug("Unknown host exception connecting to  " + sn);
405     } catch (FailedServerException e) {
406       if (LOG.isDebugEnabled()) {
407         LOG.debug("Server " + sn + " is in failed server list.");
408       }
409     } catch (IOException ioe) {
410       Throwable cause = ioe.getCause();
411       if (ioe instanceof ConnectException) {
412         // Catch. Connect refused.
413       } else if (cause != null && cause instanceof EOFException) {
414         // Catch. Other end disconnected us.
415       } else if (cause != null && cause.getMessage() != null &&
416         cause.getMessage().toLowerCase().contains("connection reset")) {
417         // Catch. Connection reset.
418       } else {
419         throw ioe;
420       }
421 
422     }
423     return service;
424   }
425 
426   /**
427    * Sets the location of <code>hbase:meta</code> in ZooKeeper to the
428    * specified server address.
429    * @param zookeeper zookeeper reference
430    * @param serverName The server hosting <code>hbase:meta</code>
431    * @param state The region transition state
432    * @throws KeeperException unexpected zookeeper exception
433    */
434   public static void setMetaLocation(ZooKeeperWatcher zookeeper,
435       ServerName serverName, RegionState.State state) throws KeeperException {
436     setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state);
437   }
438 
439   /**
440    * Sets the location of <code>hbase:meta</code> in ZooKeeper to the
441    * specified server address.
442    * @param zookeeper
443    * @param serverName
444    * @param replicaId
445    * @param state
446    * @throws KeeperException
447    */
448   public static void setMetaLocation(ZooKeeperWatcher zookeeper,
449       ServerName serverName, int replicaId, RegionState.State state) throws KeeperException {
450     LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
451     // Make the MetaRegionServer pb and then get its bytes and save this as
452     // the znode content.
453     MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
454       .setServer(ProtobufUtil.toServerName(serverName))
455       .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
456       .setState(state.convert()).build();
457     byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
458     try {
459       ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
460     } catch(KeeperException.NoNodeException nne) {
461       if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
462         LOG.debug("META region location doesn't exist, create it");
463       } else {
464         LOG.debug("META region location doesn't exist for replicaId " + replicaId +
465             ", create it");
466       }
467       ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
468     }
469   }
470 
471   /**
472    * Load the meta region state from the meta server ZNode.
473    */
474   public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
475     return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
476   }
477 
478   /**
479    * Load the meta region state from the meta server ZNode.
480    * @param zkw
481    * @param replicaId
482    * @return regionstate
483    * @throws KeeperException
484    */
485   public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)
486       throws KeeperException {
487     RegionState.State state = RegionState.State.OPEN;
488     ServerName serverName = null;
489     try {
490       byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId));
491       if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
492         try {
493           int prefixLen = ProtobufUtil.lengthOfPBMagic();
494           ZooKeeperProtos.MetaRegionServer rl =
495             ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
496               (data, prefixLen, data.length - prefixLen);
497           if (rl.hasState()) {
498             state = RegionState.State.convert(rl.getState());
499           }
500           HBaseProtos.ServerName sn = rl.getServer();
501           serverName = ServerName.valueOf(
502             sn.getHostName(), sn.getPort(), sn.getStartCode());
503         } catch (InvalidProtocolBufferException e) {
504           throw new DeserializationException("Unable to parse meta region location");
505         }
506       } else {
507         // old style of meta region location?
508         serverName = ServerName.parseFrom(data);
509       }
510     } catch (DeserializationException e) {
511       throw ZKUtil.convert(e);
512     } catch (InterruptedException e) {
513       Thread.currentThread().interrupt();
514     }
515     if (serverName == null) {
516       state = RegionState.State.OFFLINE;
517     }
518     return new RegionState(
519         RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId),
520       state, serverName);
521   }
522 
523   /**
524    * Deletes the location of <code>hbase:meta</code> in ZooKeeper.
525    * @param zookeeper zookeeper reference
526    * @throws KeeperException unexpected zookeeper exception
527    */
528   public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
529   throws KeeperException {
530     deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID);
531   }
532 
533   public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)
534   throws KeeperException {
535     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
536       LOG.info("Deleting hbase:meta region location in ZooKeeper");
537     } else {
538       LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper");
539     }
540     try {
541       // Just delete the node.  Don't need any watches.
542       ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId));
543     } catch(KeeperException.NoNodeException nne) {
544       // Has already been deleted
545     }
546   }
547   /**
548    * Wait until the primary meta region is available. Get the secondary
549    * locations as well but don't block for those.
550    * @param zkw
551    * @param timeout
552    * @param conf
553    * @return ServerName or null if we timed out.
554    * @throws InterruptedException
555    */
556   public List<ServerName> blockUntilAvailable(final ZooKeeperWatcher zkw,
557       final long timeout, Configuration conf)
558           throws InterruptedException {
559     int numReplicasConfigured = 1;
560     try {
561       List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
562       numReplicasConfigured = metaReplicaNodes.size();
563     } catch (KeeperException e) {
564       LOG.warn("Got ZK exception " + e);
565     }
566     List<ServerName> servers = new ArrayList<ServerName>(numReplicasConfigured);
567     ServerName server = blockUntilAvailable(zkw, timeout);
568     if (server == null) return null;
569     servers.add(server);
570 
571     for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
572       // return all replica locations for the meta
573       servers.add(getMetaRegionLocation(zkw, replicaId));
574     }
575     return servers;
576   }
577 
578   /**
579    * Wait until the meta region is available and is not in transition.
580    * @param zkw zookeeper connection to use
581    * @param timeout maximum time to wait, in millis
582    * @return ServerName or null if we timed out.
583    * @throws InterruptedException
584    */
585   public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
586       final long timeout)
587   throws InterruptedException {
588     return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
589   }
590 
591   /**
592    * Wait until the meta region is available and is not in transition.
593    * @param zkw
594    * @param replicaId
595    * @param timeout
596    * @return ServerName or null if we timed out.
597    * @throws InterruptedException
598    */
599   public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId,
600       final long timeout)
601   throws InterruptedException {
602     if (timeout < 0) throw new IllegalArgumentException();
603     if (zkw == null) throw new IllegalArgumentException();
604     Stopwatch sw = new Stopwatch().start();
605     ServerName sn = null;
606     try {
607       while (true) {
608         sn = getMetaRegionLocation(zkw, replicaId);
609         if (sn != null || sw.elapsedMillis()
610             > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
611           break;
612         }
613         Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
614       }
615     } finally {
616       sw.stop();
617     }
618     return sn;
619   }
620 
621   /**
622    * Stop working.
623    * Interrupts any ongoing waits.
624    */
625   public void stop() {
626     if (!stopped) {
627       LOG.debug("Stopping MetaTableLocator");
628       stopped = true;
629     }
630   }
631 }