1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.zookeeper;
19
20 import com.google.common.base.Stopwatch;
21 import com.google.protobuf.InvalidProtocolBufferException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.hbase.HConstants;
28 import org.apache.hadoop.hbase.HRegionInfo;
29 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
30 import org.apache.hadoop.hbase.ServerName;
31 import org.apache.hadoop.hbase.classification.InterfaceAudience;
32 import org.apache.hadoop.hbase.client.ClusterConnection;
33 import org.apache.hadoop.hbase.client.Connection;
34 import org.apache.hadoop.hbase.client.HConnection;
35 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
36 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
37 import org.apache.hadoop.hbase.exceptions.DeserializationException;
38 import org.apache.hadoop.hbase.ServerName;
39 import org.apache.hadoop.hbase.ipc.FailedServerException;
40 import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
41 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
42 import org.apache.hadoop.hbase.master.RegionState;
43 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
44 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
45 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
46 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
47 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
48 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
49 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.Pair;
52 import org.apache.hadoop.ipc.RemoteException;
53 import org.apache.zookeeper.KeeperException;
54
55 import java.io.EOFException;
56 import java.io.IOException;
57 import java.net.ConnectException;
58 import java.net.NoRouteToHostException;
59 import java.net.SocketException;
60 import java.net.SocketTimeoutException;
61 import java.net.UnknownHostException;
62
63 import java.util.List;
64 import java.util.ArrayList;
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 @InterfaceAudience.Private
82 public class MetaTableLocator {
83 private static final Log LOG = LogFactory.getLog(MetaTableLocator.class);
84
85
86 private volatile boolean stopped = false;
87
88
89
90
91
92 public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
93 return getMetaRegionLocation(zkw) != null;
94 }
95
96
97
98
99
100 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
101 return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
102 }
103
104
105
106
107
108
109
110 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw,
111 int replicaId) {
112 ServerName serverName = getMetaRegionLocation(zkw, replicaId);
113 List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
114 list.add(new Pair<HRegionInfo, ServerName>(RegionReplicaUtil.getRegionInfoForReplica(
115 HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName));
116 return list;
117 }
118
119
120
121
122
123 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
124 return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
125 }
126
127
128
129
130
131
132
133 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw, int replicaId) {
134 List<Pair<HRegionInfo, ServerName>> result;
135 result = getMetaRegionsAndLocations(zkw, replicaId);
136 return getListOfHRegionInfos(result);
137 }
138
139 private List<HRegionInfo> getListOfHRegionInfos(
140 final List<Pair<HRegionInfo, ServerName>> pairs) {
141 if (pairs == null || pairs.isEmpty()) return null;
142 List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
143 for (Pair<HRegionInfo, ServerName> pair: pairs) {
144 result.add(pair.getFirst());
145 }
146 return result;
147 }
148
149
150
151
152
153
154 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
155 try {
156 RegionState state = getMetaRegionState(zkw);
157 return state.isOpened() ? state.getServerName() : null;
158 } catch (KeeperException ke) {
159 return null;
160 }
161 }
162
163
164
165
166
167
168
169 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) {
170 try {
171 RegionState state = getMetaRegionState(zkw, replicaId);
172 return state.isOpened() ? state.getServerName() : null;
173 } catch (KeeperException ke) {
174 return null;
175 }
176 }
177
178
179
180
181
182
183
184
185
186
187
188
189
190 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)
191 throws InterruptedException, NotAllMetaRegionsOnlineException {
192 return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
193 }
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)
209 throws InterruptedException, NotAllMetaRegionsOnlineException {
210 try {
211 if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
212 String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
213 + "There could be a mismatch with the one configured in the master.";
214 LOG.error(errorMsg);
215 throw new IllegalArgumentException(errorMsg);
216 }
217 } catch (KeeperException e) {
218 throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
219 }
220 ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
221
222 if (sn == null) {
223 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
224 }
225
226 return sn;
227 }
228
229
230
231
232
233
234
235
236 public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException {
237 Stopwatch stopwatch = new Stopwatch().start();
238 while (!stopped) {
239 try {
240 if (waitMetaRegionLocation(zkw, 100) != null) break;
241 long sleepTime = stopwatch.elapsedMillis();
242
243 if ((sleepTime + 1) % 10000 == 0) {
244 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
245 }
246 } catch (NotAllMetaRegionsOnlineException e) {
247 if (LOG.isTraceEnabled()) {
248 LOG.trace("hbase:meta still not available, sleeping and retrying." +
249 " Reason: " + e.getMessage());
250 }
251 }
252 }
253 }
254
255
256
257
258
259
260
261
262
263
264
265 public boolean verifyMetaRegionLocation(HConnection hConnection,
266 ZooKeeperWatcher zkw, final long timeout)
267 throws InterruptedException, IOException {
268 return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID);
269 }
270
271
272
273
274
275
276
277
278
279
280
281 public boolean verifyMetaRegionLocation(HConnection hConnection,
282 ZooKeeperWatcher zkw, final long timeout, int replicaId)
283 throws InterruptedException, IOException {
284 AdminProtos.AdminService.BlockingInterface service = null;
285 try {
286 service = getMetaServerConnection(hConnection, zkw, timeout, replicaId);
287 } catch (NotAllMetaRegionsOnlineException e) {
288
289 } catch (ServerNotRunningYetException e) {
290
291 } catch (UnknownHostException e) {
292
293 } catch (RegionServerStoppedException e) {
294
295 }
296 return (service != null) && verifyRegionLocation(hConnection, service,
297 getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
298 HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName());
299 }
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316 private boolean verifyRegionLocation(final Connection connection,
317 AdminService.BlockingInterface hostingServer, final ServerName address,
318 final byte [] regionName)
319 throws IOException {
320 if (hostingServer == null) {
321 LOG.info("Passed hostingServer is null");
322 return false;
323 }
324 Throwable t;
325 PayloadCarryingRpcController controller = null;
326 if (connection instanceof ClusterConnection) {
327 controller = ((ClusterConnection) connection).getRpcControllerFactory().newController();
328 }
329 try {
330
331 return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
332 } catch (ConnectException e) {
333 t = e;
334 } catch (RetriesExhaustedException e) {
335 t = e;
336 } catch (RemoteException e) {
337 IOException ioe = e.unwrapRemoteException();
338 t = ioe;
339 } catch (IOException e) {
340 Throwable cause = e.getCause();
341 if (cause != null && cause instanceof EOFException) {
342 t = cause;
343 } else if (cause != null && cause.getMessage() != null
344 && cause.getMessage().contains("Connection reset")) {
345 t = cause;
346 } else {
347 t = e;
348 }
349 }
350 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
351 " at address=" + address + ", exception=" + t.getMessage());
352 return false;
353 }
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368 private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection,
369 ZooKeeperWatcher zkw, long timeout, int replicaId)
370 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
371 return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout));
372 }
373
374
375
376
377
378
379
380
381 @SuppressWarnings("deprecation")
382 private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection,
383 ServerName sn)
384 throws IOException {
385 if (sn == null) {
386 return null;
387 }
388 AdminService.BlockingInterface service = null;
389 try {
390 service = hConnection.getAdmin(sn);
391 } catch (RetriesExhaustedException e) {
392 if (e.getCause() != null && e.getCause() instanceof ConnectException) {
393
394 } else {
395 throw e;
396 }
397 } catch (SocketTimeoutException e) {
398 LOG.debug("Timed out connecting to " + sn);
399 } catch (NoRouteToHostException e) {
400 LOG.debug("Connecting to " + sn, e);
401 } catch (SocketException e) {
402 LOG.debug("Exception connecting to " + sn);
403 } catch (UnknownHostException e) {
404 LOG.debug("Unknown host exception connecting to " + sn);
405 } catch (FailedServerException e) {
406 if (LOG.isDebugEnabled()) {
407 LOG.debug("Server " + sn + " is in failed server list.");
408 }
409 } catch (IOException ioe) {
410 Throwable cause = ioe.getCause();
411 if (ioe instanceof ConnectException) {
412
413 } else if (cause != null && cause instanceof EOFException) {
414
415 } else if (cause != null && cause.getMessage() != null &&
416 cause.getMessage().toLowerCase().contains("connection reset")) {
417
418 } else {
419 throw ioe;
420 }
421
422 }
423 return service;
424 }
425
426
427
428
429
430
431
432
433
434 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
435 ServerName serverName, RegionState.State state) throws KeeperException {
436 setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state);
437 }
438
439
440
441
442
443
444
445
446
447
448 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
449 ServerName serverName, int replicaId, RegionState.State state) throws KeeperException {
450 LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
451
452
453 MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
454 .setServer(ProtobufUtil.toServerName(serverName))
455 .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
456 .setState(state.convert()).build();
457 byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
458 try {
459 ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
460 } catch(KeeperException.NoNodeException nne) {
461 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
462 LOG.debug("META region location doesn't exist, create it");
463 } else {
464 LOG.debug("META region location doesn't exist for replicaId " + replicaId +
465 ", create it");
466 }
467 ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
468 }
469 }
470
471
472
473
474 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
475 return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
476 }
477
478
479
480
481
482
483
484
485 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)
486 throws KeeperException {
487 RegionState.State state = RegionState.State.OPEN;
488 ServerName serverName = null;
489 try {
490 byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId));
491 if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
492 try {
493 int prefixLen = ProtobufUtil.lengthOfPBMagic();
494 ZooKeeperProtos.MetaRegionServer rl =
495 ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
496 (data, prefixLen, data.length - prefixLen);
497 if (rl.hasState()) {
498 state = RegionState.State.convert(rl.getState());
499 }
500 HBaseProtos.ServerName sn = rl.getServer();
501 serverName = ServerName.valueOf(
502 sn.getHostName(), sn.getPort(), sn.getStartCode());
503 } catch (InvalidProtocolBufferException e) {
504 throw new DeserializationException("Unable to parse meta region location");
505 }
506 } else {
507
508 serverName = ServerName.parseFrom(data);
509 }
510 } catch (DeserializationException e) {
511 throw ZKUtil.convert(e);
512 } catch (InterruptedException e) {
513 Thread.currentThread().interrupt();
514 }
515 if (serverName == null) {
516 state = RegionState.State.OFFLINE;
517 }
518 return new RegionState(
519 RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId),
520 state, serverName);
521 }
522
523
524
525
526
527
528 public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
529 throws KeeperException {
530 deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID);
531 }
532
533 public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)
534 throws KeeperException {
535 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
536 LOG.info("Deleting hbase:meta region location in ZooKeeper");
537 } else {
538 LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper");
539 }
540 try {
541
542 ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId));
543 } catch(KeeperException.NoNodeException nne) {
544
545 }
546 }
547
548
549
550
551
552
553
554
555
556 public List<ServerName> blockUntilAvailable(final ZooKeeperWatcher zkw,
557 final long timeout, Configuration conf)
558 throws InterruptedException {
559 int numReplicasConfigured = 1;
560 try {
561 List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
562 numReplicasConfigured = metaReplicaNodes.size();
563 } catch (KeeperException e) {
564 LOG.warn("Got ZK exception " + e);
565 }
566 List<ServerName> servers = new ArrayList<ServerName>(numReplicasConfigured);
567 ServerName server = blockUntilAvailable(zkw, timeout);
568 if (server == null) return null;
569 servers.add(server);
570
571 for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
572
573 servers.add(getMetaRegionLocation(zkw, replicaId));
574 }
575 return servers;
576 }
577
578
579
580
581
582
583
584
585 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
586 final long timeout)
587 throws InterruptedException {
588 return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
589 }
590
591
592
593
594
595
596
597
598
599 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId,
600 final long timeout)
601 throws InterruptedException {
602 if (timeout < 0) throw new IllegalArgumentException();
603 if (zkw == null) throw new IllegalArgumentException();
604 Stopwatch sw = new Stopwatch().start();
605 ServerName sn = null;
606 try {
607 while (true) {
608 sn = getMetaRegionLocation(zkw, replicaId);
609 if (sn != null || sw.elapsedMillis()
610 > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
611 break;
612 }
613 Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
614 }
615 } finally {
616 sw.stop();
617 }
618 return sn;
619 }
620
621
622
623
624
625 public void stop() {
626 if (!stopped) {
627 LOG.debug("Stopping MetaTableLocator");
628 stopped = true;
629 }
630 }
631 }