001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.zookeeper; 019 020import java.io.EOFException; 021import java.io.IOException; 022import java.net.ConnectException; 023import java.net.NoRouteToHostException; 024import java.net.SocketException; 025import java.net.SocketTimeoutException; 026import java.net.UnknownHostException; 027import java.util.ArrayList; 028import java.util.Collections; 029import java.util.List; 030import java.util.Locale; 031 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.client.ClusterConnection; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.RegionInfoBuilder; 039import org.apache.hadoop.hbase.client.RegionReplicaUtil; 040import org.apache.hadoop.hbase.client.RetriesExhaustedException; 041import org.apache.hadoop.hbase.exceptions.DeserializationException; 042import org.apache.hadoop.hbase.ipc.FailedServerException; 043import org.apache.hadoop.hbase.ipc.HBaseRpcController; 044import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; 045import org.apache.hadoop.hbase.master.RegionState; 046import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.Pair; 049import org.apache.hadoop.ipc.RemoteException; 050import org.apache.yetus.audience.InterfaceAudience; 051import org.apache.zookeeper.KeeperException; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 055import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 056import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos; 057import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService; 058import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; 059import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos; 060import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.MetaRegionServer; 061 062/** 063 * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper 064 * which keeps hbase:meta region server location. 065 * 066 * Stateless class with a bunch of static methods. Doesn't manage resources passed in 067 * (e.g. Connection, ZKWatcher etc). 068 * 069 * Meta region location is set by <code>RegionServerServices</code>. 070 * This class doesn't use ZK watchers, rather accesses ZK directly. 071 * 072 * This class it stateless. The only reason it's not made a non-instantiable util class 073 * with a collection of static methods is that it'd be rather hard to mock properly in tests. 074 * 075 * TODO: rewrite using RPC calls to master to find out about hbase:meta. 076 */ 077@InterfaceAudience.Private 078public class MetaTableLocator { 079 private static final Logger LOG = LoggerFactory.getLogger(MetaTableLocator.class); 080 081 // only needed to allow non-timeout infinite waits to stop when cluster shuts down 082 private volatile boolean stopped = false; 083 084 /** 085 * Checks if the meta region location is available. 086 * @return true if meta region location is available, false if not 087 */ 088 public boolean isLocationAvailable(ZKWatcher zkw) { 089 return getMetaRegionLocation(zkw) != null; 090 } 091 092 /** 093 * @param zkw ZooKeeper watcher to be used 094 * @return meta table regions and their locations. 095 */ 096 public List<Pair<RegionInfo, ServerName>> getMetaRegionsAndLocations(ZKWatcher zkw) { 097 return getMetaRegionsAndLocations(zkw, RegionInfo.DEFAULT_REPLICA_ID); 098 } 099 100 /** 101 * Gets the meta regions and their locations for the given path and replica ID. 102 * 103 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 104 * @param replicaId the ID of the replica 105 * @return meta table regions and their locations. 106 */ 107 public List<Pair<RegionInfo, ServerName>> getMetaRegionsAndLocations(ZKWatcher zkw, 108 int replicaId) { 109 ServerName serverName = getMetaRegionLocation(zkw, replicaId); 110 List<Pair<RegionInfo, ServerName>> list = new ArrayList<>(1); 111 list.add(new Pair<>(RegionReplicaUtil.getRegionInfoForReplica( 112 RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), serverName)); 113 return list; 114 } 115 116 /** 117 * Gets the meta regions for the given path with the default replica ID. 118 * 119 * @param zkw ZooKeeper watcher to be used 120 * @return List of meta regions 121 */ 122 public List<RegionInfo> getMetaRegions(ZKWatcher zkw) { 123 return getMetaRegions(zkw, RegionInfo.DEFAULT_REPLICA_ID); 124 } 125 126 /** 127 * Gets the meta regions for the given path and replica ID. 128 * 129 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 130 * @param replicaId the ID of the replica 131 * @return List of meta regions 132 */ 133 public List<RegionInfo> getMetaRegions(ZKWatcher zkw, int replicaId) { 134 List<Pair<RegionInfo, ServerName>> result; 135 result = getMetaRegionsAndLocations(zkw, replicaId); 136 return getListOfRegionInfos(result); 137 } 138 139 private List<RegionInfo> getListOfRegionInfos(final List<Pair<RegionInfo, ServerName>> pairs) { 140 if (pairs == null || pairs.isEmpty()) { 141 return Collections.EMPTY_LIST; 142 } 143 144 List<RegionInfo> result = new ArrayList<>(pairs.size()); 145 for (Pair<RegionInfo, ServerName> pair: pairs) { 146 result.add(pair.getFirst()); 147 } 148 return result; 149 } 150 151 /** 152 * Gets the meta region location, if available. Does not block. 153 * @param zkw zookeeper connection to use 154 * @return server name or null if we failed to get the data. 155 */ 156 public ServerName getMetaRegionLocation(final ZKWatcher zkw) { 157 try { 158 RegionState state = getMetaRegionState(zkw); 159 return state.isOpened() ? state.getServerName() : null; 160 } catch (KeeperException ke) { 161 return null; 162 } 163 } 164 165 /** 166 * Gets the meta region location, if available. Does not block. 167 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 168 * @param replicaId the ID of the replica 169 * @return server name 170 */ 171 public ServerName getMetaRegionLocation(final ZKWatcher zkw, int replicaId) { 172 try { 173 RegionState state = getMetaRegionState(zkw, replicaId); 174 return state.isOpened() ? state.getServerName() : null; 175 } catch (KeeperException ke) { 176 return null; 177 } 178 } 179 180 /** 181 * Gets the meta region location, if available, and waits for up to the 182 * specified timeout if not immediately available. 183 * Given the zookeeper notification could be delayed, we will try to 184 * get the latest data. 185 * 186 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 187 * @param timeout maximum time to wait, in millis 188 * @return server name for server hosting meta region formatted as per 189 * {@link ServerName}, or null if none available 190 * @throws InterruptedException if interrupted while waiting 191 * @throws NotAllMetaRegionsOnlineException if a meta or root region is not online 192 */ 193 public ServerName waitMetaRegionLocation(ZKWatcher zkw, long timeout) 194 throws InterruptedException, NotAllMetaRegionsOnlineException { 195 return waitMetaRegionLocation(zkw, RegionInfo.DEFAULT_REPLICA_ID, timeout); 196 } 197 198 /** 199 * Gets the meta region location, if available, and waits for up to the specified timeout if not 200 * immediately available. Given the zookeeper notification could be delayed, we will try to 201 * get the latest data. 202 * 203 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 204 * @param replicaId the ID of the replica 205 * @param timeout maximum time to wait, in millis 206 * @return server name for server hosting meta region formatted as per 207 * {@link ServerName}, or null if none available 208 * @throws InterruptedException if waiting for the socket operation fails 209 * @throws NotAllMetaRegionsOnlineException if a meta or root region is not online 210 */ 211 public ServerName waitMetaRegionLocation(ZKWatcher zkw, int replicaId, long timeout) 212 throws InterruptedException, NotAllMetaRegionsOnlineException { 213 try { 214 if (ZKUtil.checkExists(zkw, zkw.znodePaths.baseZNode) == -1) { 215 String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. " 216 + "There could be a mismatch with the one configured in the master."; 217 LOG.error(errorMsg); 218 throw new IllegalArgumentException(errorMsg); 219 } 220 } catch (KeeperException e) { 221 throw new IllegalStateException("KeeperException while trying to check baseZNode:", e); 222 } 223 ServerName sn = blockUntilAvailable(zkw, replicaId, timeout); 224 225 if (sn == null) { 226 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms"); 227 } 228 229 return sn; 230 } 231 232 /** 233 * Waits indefinitely for availability of <code>hbase:meta</code>. Used during 234 * cluster startup. Does not verify meta, just that something has been 235 * set up in zk. 236 * @see #waitMetaRegionLocation(ZKWatcher, long) 237 * @throws InterruptedException if interrupted while waiting 238 */ 239 public void waitMetaRegionLocation(ZKWatcher zkw) throws InterruptedException { 240 long startTime = System.currentTimeMillis(); 241 while (!stopped) { 242 try { 243 if (waitMetaRegionLocation(zkw, 100) != null) { 244 break; 245 } 246 247 long sleepTime = System.currentTimeMillis() - startTime; 248 // +1 in case sleepTime=0 249 if ((sleepTime + 1) % 10000 == 0) { 250 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms"); 251 } 252 } catch (NotAllMetaRegionsOnlineException e) { 253 if (LOG.isTraceEnabled()) { 254 LOG.trace("hbase:meta still not available, sleeping and retrying." + 255 " Reason: " + e.getMessage()); 256 } 257 } 258 } 259 } 260 261 /** 262 * Verify <code>hbase:meta</code> is deployed and accessible. 263 * 264 * @param hConnection the connection to use 265 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 266 * @param timeout How long to wait on zk for meta address (passed through to 267 * the internal call to {@link #getMetaServerConnection}. 268 * @return True if the <code>hbase:meta</code> location is healthy. 269 * @throws IOException if the number of retries for getting the connection is exceeded 270 * @throws InterruptedException if waiting for the socket operation fails 271 */ 272 public boolean verifyMetaRegionLocation(ClusterConnection hConnection, ZKWatcher zkw, 273 final long timeout) throws InterruptedException, IOException { 274 return verifyMetaRegionLocation(hConnection, zkw, timeout, RegionInfo.DEFAULT_REPLICA_ID); 275 } 276 277 /** 278 * Verify <code>hbase:meta</code> is deployed and accessible. 279 * 280 * @param connection the connection to use 281 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 282 * @param timeout How long to wait on zk for meta address (passed through to 283 * @param replicaId the ID of the replica 284 * @return True if the <code>hbase:meta</code> location is healthy. 285 * @throws InterruptedException if waiting for the socket operation fails 286 * @throws IOException if the number of retries for getting the connection is exceeded 287 */ 288 public boolean verifyMetaRegionLocation(ClusterConnection connection, ZKWatcher zkw, 289 final long timeout, int replicaId) throws InterruptedException, IOException { 290 AdminProtos.AdminService.BlockingInterface service = null; 291 try { 292 service = getMetaServerConnection(connection, zkw, timeout, replicaId); 293 } catch (NotAllMetaRegionsOnlineException e) { 294 // Pass 295 } catch (ServerNotRunningYetException e) { 296 // Pass -- remote server is not up so can't be carrying root 297 } catch (UnknownHostException e) { 298 // Pass -- server name doesn't resolve so it can't be assigned anything. 299 } catch (RegionServerStoppedException e) { 300 // Pass -- server name sends us to a server that is dying or already dead. 301 } 302 return (service != null) && verifyRegionLocation(connection, service, 303 getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica( 304 RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId).getRegionName()); 305 } 306 307 /** 308 * Verify we can connect to <code>hostingServer</code> and that its carrying 309 * <code>regionName</code>. 310 * @param hostingServer Interface to the server hosting <code>regionName</code> 311 * @param address The servername that goes with the <code>metaServer</code> interface. 312 * Used logging. 313 * @param regionName The regionname we are interested in. 314 * @return True if we were able to verify the region located at other side of the interface. 315 */ 316 // TODO: We should be able to get the ServerName from the AdminProtocol 317 // rather than have to pass it in. Its made awkward by the fact that the 318 // HRI is likely a proxy against remote server so the getServerName needs 319 // to be fixed to go to a local method or to a cache before we can do this. 320 private boolean verifyRegionLocation(final ClusterConnection connection, 321 AdminService.BlockingInterface hostingServer, final ServerName address, 322 final byte [] regionName) { 323 if (hostingServer == null) { 324 LOG.info("Passed hostingServer is null"); 325 return false; 326 } 327 Throwable t; 328 HBaseRpcController controller = connection.getRpcControllerFactory().newController(); 329 try { 330 // Try and get regioninfo from the hosting server. 331 return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null; 332 } catch (ConnectException e) { 333 t = e; 334 } catch (RetriesExhaustedException e) { 335 t = e; 336 } catch (RemoteException e) { 337 IOException ioe = e.unwrapRemoteException(); 338 t = ioe; 339 } catch (IOException e) { 340 Throwable cause = e.getCause(); 341 if (cause != null && cause instanceof EOFException) { 342 t = cause; 343 } else if (cause != null && cause.getMessage() != null 344 && cause.getMessage().contains("Connection reset")) { 345 t = cause; 346 } else { 347 t = e; 348 } 349 } 350 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) + 351 " at address=" + address + ", exception=" + t.getMessage()); 352 return false; 353 } 354 355 /** 356 * Gets a connection to the server hosting meta, as reported by ZooKeeper, waiting up to the 357 * specified timeout for availability. 358 * 359 * <p>WARNING: Does not retry. Use an {@link org.apache.hadoop.hbase.client.HTable} instead. 360 * 361 * @param connection the connection to use 362 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 363 * @param timeout How long to wait on meta location 364 * @param replicaId the ID of the replica 365 * @return connection to server hosting meta 366 * @throws InterruptedException if waiting for the socket operation fails 367 * @throws IOException if the number of retries for getting the connection is exceeded 368 */ 369 private AdminService.BlockingInterface getMetaServerConnection(ClusterConnection connection, 370 ZKWatcher zkw, long timeout, int replicaId) throws InterruptedException, IOException { 371 return getCachedConnection(connection, waitMetaRegionLocation(zkw, replicaId, timeout)); 372 } 373 374 /** 375 * @param sn ServerName to get a connection against. 376 * @return The AdminProtocol we got when we connected to <code>sn</code> 377 * May have come from cache, may not be good, may have been setup by this invocation, or 378 * may be null. 379 * @throws IOException if the number of retries for getting the connection is exceeded 380 */ 381 private static AdminService.BlockingInterface getCachedConnection(ClusterConnection connection, 382 ServerName sn) throws IOException { 383 if (sn == null) { 384 return null; 385 } 386 AdminService.BlockingInterface service = null; 387 try { 388 service = connection.getAdmin(sn); 389 } catch (RetriesExhaustedException e) { 390 if (e.getCause() != null && e.getCause() instanceof ConnectException) { 391 // Catch this; presume it means the cached connection has gone bad. 392 } else { 393 throw e; 394 } 395 } catch (SocketTimeoutException e) { 396 LOG.debug("Timed out connecting to " + sn); 397 } catch (NoRouteToHostException e) { 398 LOG.debug("Connecting to " + sn, e); 399 } catch (SocketException e) { 400 LOG.debug("Exception connecting to " + sn); 401 } catch (UnknownHostException e) { 402 LOG.debug("Unknown host exception connecting to " + sn); 403 } catch (FailedServerException e) { 404 if (LOG.isDebugEnabled()) { 405 LOG.debug("Server " + sn + " is in failed server list."); 406 } 407 } catch (IOException ioe) { 408 Throwable cause = ioe.getCause(); 409 if (ioe instanceof ConnectException) { 410 // Catch. Connect refused. 411 } else if (cause != null && cause instanceof EOFException) { 412 // Catch. Other end disconnected us. 413 } else if (cause != null && cause.getMessage() != null && 414 cause.getMessage().toLowerCase(Locale.ROOT).contains("connection reset")) { 415 // Catch. Connection reset. 416 } else { 417 throw ioe; 418 } 419 420 } 421 return service; 422 } 423 424 /** 425 * Sets the location of <code>hbase:meta</code> in ZooKeeper to the 426 * specified server address. 427 * @param zookeeper zookeeper reference 428 * @param serverName The server hosting <code>hbase:meta</code> 429 * @param state The region transition state 430 * @throws KeeperException unexpected zookeeper exception 431 */ 432 public static void setMetaLocation(ZKWatcher zookeeper, 433 ServerName serverName, RegionState.State state) throws KeeperException { 434 setMetaLocation(zookeeper, serverName, RegionInfo.DEFAULT_REPLICA_ID, state); 435 } 436 437 /** 438 * Sets the location of <code>hbase:meta</code> in ZooKeeper to the specified server address. 439 * @param zookeeper reference to the {@link ZKWatcher} which also contains configuration and 440 * operation 441 * @param serverName the name of the server 442 * @param replicaId the ID of the replica 443 * @param state the state of the region 444 * @throws KeeperException if a ZooKeeper operation fails 445 */ 446 public static void setMetaLocation(ZKWatcher zookeeper, ServerName serverName, int replicaId, 447 RegionState.State state) throws KeeperException { 448 if (serverName == null) { 449 LOG.warn("Tried to set null ServerName in hbase:meta; skipping -- ServerName required"); 450 return; 451 } 452 LOG.info("Setting hbase:meta (replicaId=" + replicaId + ") location in ZooKeeper as " + 453 serverName); 454 // Make the MetaRegionServer pb and then get its bytes and save this as 455 // the znode content. 456 MetaRegionServer pbrsr = MetaRegionServer.newBuilder() 457 .setServer(ProtobufUtil.toServerName(serverName)) 458 .setRpcVersion(HConstants.RPC_CURRENT_VERSION) 459 .setState(state.convert()).build(); 460 byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray()); 461 try { 462 ZKUtil.setData(zookeeper, 463 zookeeper.znodePaths.getZNodeForReplica(replicaId), data); 464 } catch(KeeperException.NoNodeException nne) { 465 if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) { 466 LOG.debug("META region location doesn't exist, create it"); 467 } else { 468 LOG.debug("META region location doesn't exist for replicaId=" + replicaId + 469 ", create it"); 470 } 471 ZKUtil.createAndWatch(zookeeper, zookeeper.znodePaths.getZNodeForReplica(replicaId), data); 472 } 473 } 474 475 /** 476 * Load the meta region state from the meta server ZNode. 477 */ 478 public static RegionState getMetaRegionState(ZKWatcher zkw) throws KeeperException { 479 return getMetaRegionState(zkw, RegionInfo.DEFAULT_REPLICA_ID); 480 } 481 482 /** 483 * Load the meta region state from the meta server ZNode. 484 * 485 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 486 * @param replicaId the ID of the replica 487 * @return regionstate 488 * @throws KeeperException if a ZooKeeper operation fails 489 */ 490 public static RegionState getMetaRegionState(ZKWatcher zkw, int replicaId) 491 throws KeeperException { 492 RegionState.State state = RegionState.State.OPEN; 493 ServerName serverName = null; 494 try { 495 byte[] data = ZKUtil.getData(zkw, zkw.znodePaths.getZNodeForReplica(replicaId)); 496 if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { 497 try { 498 int prefixLen = ProtobufUtil.lengthOfPBMagic(); 499 ZooKeeperProtos.MetaRegionServer rl = 500 ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom(data, prefixLen, 501 data.length - prefixLen); 502 if (rl.hasState()) { 503 state = RegionState.State.convert(rl.getState()); 504 } 505 HBaseProtos.ServerName sn = rl.getServer(); 506 serverName = ServerName.valueOf( 507 sn.getHostName(), sn.getPort(), sn.getStartCode()); 508 } catch (InvalidProtocolBufferException e) { 509 throw new DeserializationException("Unable to parse meta region location"); 510 } 511 } else { 512 // old style of meta region location? 513 serverName = ProtobufUtil.parseServerNameFrom(data); 514 } 515 } catch (DeserializationException e) { 516 throw ZKUtil.convert(e); 517 } catch (InterruptedException e) { 518 Thread.currentThread().interrupt(); 519 } 520 if (serverName == null) { 521 state = RegionState.State.OFFLINE; 522 } 523 return new RegionState( 524 RegionReplicaUtil.getRegionInfoForReplica( 525 RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), 526 state, serverName); 527 } 528 529 /** 530 * Deletes the location of <code>hbase:meta</code> in ZooKeeper. 531 * @param zookeeper zookeeper reference 532 * @throws KeeperException unexpected zookeeper exception 533 */ 534 public void deleteMetaLocation(ZKWatcher zookeeper) 535 throws KeeperException { 536 deleteMetaLocation(zookeeper, RegionInfo.DEFAULT_REPLICA_ID); 537 } 538 539 public void deleteMetaLocation(ZKWatcher zookeeper, int replicaId) 540 throws KeeperException { 541 if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) { 542 LOG.info("Deleting hbase:meta region location in ZooKeeper"); 543 } else { 544 LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper"); 545 } 546 try { 547 // Just delete the node. Don't need any watches. 548 ZKUtil.deleteNode(zookeeper, zookeeper.znodePaths.getZNodeForReplica(replicaId)); 549 } catch(KeeperException.NoNodeException nne) { 550 // Has already been deleted 551 } 552 } 553 /** 554 * Wait until the primary meta region is available. Get the secondary locations as well but don't 555 * block for those. 556 * 557 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and operation 558 * @param timeout maximum time to wait in millis 559 * @param conf the {@link Configuration} to use 560 * @return ServerName or null if we timed out. 561 * @throws InterruptedException if waiting for the socket operation fails 562 */ 563 public List<ServerName> blockUntilAvailable(final ZKWatcher zkw, final long timeout, 564 Configuration conf) throws InterruptedException { 565 int numReplicasConfigured = 1; 566 567 List<ServerName> servers = new ArrayList<>(); 568 // Make the blocking call first so that we do the wait to know 569 // the znodes are all in place or timeout. 570 ServerName server = blockUntilAvailable(zkw, timeout); 571 572 if (server == null) { 573 return null; 574 } 575 576 servers.add(server); 577 578 try { 579 List<String> metaReplicaNodes = zkw.getMetaReplicaNodes(); 580 numReplicasConfigured = metaReplicaNodes.size(); 581 } catch (KeeperException e) { 582 LOG.warn("Got ZK exception " + e); 583 } 584 for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) { 585 // return all replica locations for the meta 586 servers.add(getMetaRegionLocation(zkw, replicaId)); 587 } 588 return servers; 589 } 590 591 /** 592 * Wait until the meta region is available and is not in transition. 593 * @param zkw zookeeper connection to use 594 * @param timeout maximum time to wait, in millis 595 * @return ServerName or null if we timed out. 596 * @throws InterruptedException if waiting for the socket operation fails 597 */ 598 public ServerName blockUntilAvailable(final ZKWatcher zkw, final long timeout) 599 throws InterruptedException { 600 return blockUntilAvailable(zkw, RegionInfo.DEFAULT_REPLICA_ID, timeout); 601 } 602 603 /** 604 * Wait until the meta region is available and is not in transition. 605 * 606 * @param zkw reference to the {@link ZKWatcher} which also contains configuration and constants 607 * @param replicaId the ID of the replica 608 * @param timeout maximum time to wait in millis 609 * @return ServerName or null if we timed out. 610 * @throws InterruptedException if waiting for the socket operation fails 611 */ 612 public ServerName blockUntilAvailable(final ZKWatcher zkw, int replicaId, final long timeout) 613 throws InterruptedException { 614 if (timeout < 0) { 615 throw new IllegalArgumentException(); 616 } 617 618 if (zkw == null) { 619 throw new IllegalArgumentException(); 620 } 621 622 long startTime = System.currentTimeMillis(); 623 ServerName sn = null; 624 while (true) { 625 sn = getMetaRegionLocation(zkw, replicaId); 626 if (sn != null || (System.currentTimeMillis() - startTime) 627 > timeout - HConstants.SOCKET_RETRY_WAIT_MS) { 628 break; 629 } 630 Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS); 631 } 632 return sn; 633 } 634 635 /** 636 * Stop working. 637 * Interrupts any ongoing waits. 638 */ 639 public void stop() { 640 if (!stopped) { 641 LOG.debug("Stopping MetaTableLocator"); 642 stopped = true; 643 } 644 } 645}