1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.zookeeper;
19
20 import java.io.EOFException;
21 import java.io.IOException;
22 import java.net.ConnectException;
23 import java.net.NoRouteToHostException;
24 import java.net.SocketException;
25 import java.net.SocketTimeoutException;
26 import java.net.UnknownHostException;
27 import java.util.ArrayList;
28 import java.util.List;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.hbase.HConstants;
34 import org.apache.hadoop.hbase.HRegionInfo;
35 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
36 import org.apache.hadoop.hbase.ServerName;
37 import org.apache.hadoop.hbase.classification.InterfaceAudience;
38 import org.apache.hadoop.hbase.client.ClusterConnection;
39 import org.apache.hadoop.hbase.client.Connection;
40 import org.apache.hadoop.hbase.client.HConnection;
41 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
42 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
43 import org.apache.hadoop.hbase.exceptions.DeserializationException;
44 import org.apache.hadoop.hbase.ipc.FailedServerException;
45 import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
46 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
47 import org.apache.hadoop.hbase.master.RegionState;
48 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
49 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
50 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
51 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
52 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
53 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
54 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
55 import org.apache.hadoop.hbase.util.Bytes;
56 import org.apache.hadoop.hbase.util.Pair;
57 import org.apache.hadoop.ipc.RemoteException;
58 import org.apache.zookeeper.KeeperException;
59
60 import com.google.common.base.Stopwatch;
61 import com.google.protobuf.InvalidProtocolBufferException;
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78 @InterfaceAudience.Private
79 public class MetaTableLocator {
80 private static final Log LOG = LogFactory.getLog(MetaTableLocator.class);
81
82
83 private volatile boolean stopped = false;
84
85
86
87
88
89 public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
90 return getMetaRegionLocation(zkw) != null;
91 }
92
93
94
95
96
97 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
98 return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
99 }
100
101
102
103
104
105
106
107 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw,
108 int replicaId) {
109 ServerName serverName = getMetaRegionLocation(zkw, replicaId);
110 List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
111 list.add(new Pair<HRegionInfo, ServerName>(RegionReplicaUtil.getRegionInfoForReplica(
112 HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName));
113 return list;
114 }
115
116
117
118
119
120 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
121 return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
122 }
123
124
125
126
127
128
129
130 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw, int replicaId) {
131 List<Pair<HRegionInfo, ServerName>> result;
132 result = getMetaRegionsAndLocations(zkw, replicaId);
133 return getListOfHRegionInfos(result);
134 }
135
136 private List<HRegionInfo> getListOfHRegionInfos(
137 final List<Pair<HRegionInfo, ServerName>> pairs) {
138 if (pairs == null || pairs.isEmpty()) return null;
139 List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
140 for (Pair<HRegionInfo, ServerName> pair: pairs) {
141 result.add(pair.getFirst());
142 }
143 return result;
144 }
145
146
147
148
149
150
151 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
152 try {
153 RegionState state = getMetaRegionState(zkw);
154 return state.isOpened() ? state.getServerName() : null;
155 } catch (KeeperException ke) {
156 return null;
157 }
158 }
159
160
161
162
163
164
165
166 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) {
167 try {
168 RegionState state = getMetaRegionState(zkw, replicaId);
169 return state.isOpened() ? state.getServerName() : null;
170 } catch (KeeperException ke) {
171 return null;
172 }
173 }
174
175
176
177
178
179
180
181
182
183
184
185
186
187 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)
188 throws InterruptedException, NotAllMetaRegionsOnlineException {
189 return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
190 }
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)
206 throws InterruptedException, NotAllMetaRegionsOnlineException {
207 try {
208 if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
209 String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
210 + "There could be a mismatch with the one configured in the master.";
211 LOG.error(errorMsg);
212 throw new IllegalArgumentException(errorMsg);
213 }
214 } catch (KeeperException e) {
215 throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
216 }
217 ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
218
219 if (sn == null) {
220 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
221 }
222
223 return sn;
224 }
225
226
227
228
229
230
231
232
233 public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException {
234 Stopwatch stopwatch = new Stopwatch().start();
235 while (!stopped) {
236 try {
237 if (waitMetaRegionLocation(zkw, 100) != null) break;
238 long sleepTime = stopwatch.elapsedMillis();
239
240 if ((sleepTime + 1) % 10000 == 0) {
241 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
242 }
243 } catch (NotAllMetaRegionsOnlineException e) {
244 if (LOG.isTraceEnabled()) {
245 LOG.trace("hbase:meta still not available, sleeping and retrying." +
246 " Reason: " + e.getMessage());
247 }
248 }
249 }
250 }
251
252
253
254
255
256
257
258
259
260
261
262 public boolean verifyMetaRegionLocation(HConnection hConnection,
263 ZooKeeperWatcher zkw, final long timeout)
264 throws InterruptedException, IOException {
265 return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID);
266 }
267
268
269
270
271
272
273
274
275
276
277
278 public boolean verifyMetaRegionLocation(HConnection hConnection,
279 ZooKeeperWatcher zkw, final long timeout, int replicaId)
280 throws InterruptedException, IOException {
281 AdminProtos.AdminService.BlockingInterface service = null;
282 try {
283 service = getMetaServerConnection(hConnection, zkw, timeout, replicaId);
284 } catch (NotAllMetaRegionsOnlineException e) {
285
286 } catch (ServerNotRunningYetException e) {
287
288 } catch (UnknownHostException e) {
289
290 } catch (RegionServerStoppedException e) {
291
292 }
293 return (service != null) && verifyRegionLocation(hConnection, service,
294 getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
295 HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName());
296 }
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313 private boolean verifyRegionLocation(final Connection connection,
314 AdminService.BlockingInterface hostingServer, final ServerName address,
315 final byte [] regionName)
316 throws IOException {
317 if (hostingServer == null) {
318 LOG.info("Passed hostingServer is null");
319 return false;
320 }
321 Throwable t;
322 PayloadCarryingRpcController controller = null;
323 if (connection instanceof ClusterConnection) {
324 controller = ((ClusterConnection) connection).getRpcControllerFactory().newController();
325 }
326 try {
327
328 return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
329 } catch (ConnectException e) {
330 t = e;
331 } catch (RetriesExhaustedException e) {
332 t = e;
333 } catch (RemoteException e) {
334 IOException ioe = e.unwrapRemoteException();
335 t = ioe;
336 } catch (IOException e) {
337 Throwable cause = e.getCause();
338 if (cause != null && cause instanceof EOFException) {
339 t = cause;
340 } else if (cause != null && cause.getMessage() != null
341 && cause.getMessage().contains("Connection reset")) {
342 t = cause;
343 } else {
344 t = e;
345 }
346 }
347 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
348 " at address=" + address + ", exception=" + t.getMessage());
349 return false;
350 }
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365 private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection,
366 ZooKeeperWatcher zkw, long timeout, int replicaId)
367 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
368 return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout));
369 }
370
371
372
373
374
375
376
377
378 @SuppressWarnings("deprecation")
379 private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection,
380 ServerName sn)
381 throws IOException {
382 if (sn == null) {
383 return null;
384 }
385 AdminService.BlockingInterface service = null;
386 try {
387 service = hConnection.getAdmin(sn);
388 } catch (RetriesExhaustedException e) {
389 if (e.getCause() != null && e.getCause() instanceof ConnectException) {
390
391 } else {
392 throw e;
393 }
394 } catch (SocketTimeoutException e) {
395 LOG.debug("Timed out connecting to " + sn);
396 } catch (NoRouteToHostException e) {
397 LOG.debug("Connecting to " + sn, e);
398 } catch (SocketException e) {
399 LOG.debug("Exception connecting to " + sn);
400 } catch (UnknownHostException e) {
401 LOG.debug("Unknown host exception connecting to " + sn);
402 } catch (FailedServerException e) {
403 if (LOG.isDebugEnabled()) {
404 LOG.debug("Server " + sn + " is in failed server list.");
405 }
406 } catch (IOException ioe) {
407 Throwable cause = ioe.getCause();
408 if (ioe instanceof ConnectException) {
409
410 } else if (cause != null && cause instanceof EOFException) {
411
412 } else if (cause != null && cause.getMessage() != null &&
413 cause.getMessage().toLowerCase().contains("connection reset")) {
414
415 } else {
416 throw ioe;
417 }
418
419 }
420 return service;
421 }
422
423
424
425
426
427
428
429
430
431 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
432 ServerName serverName, RegionState.State state) throws KeeperException {
433 setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state);
434 }
435
436
437
438
439
440
441
442
443
444
445 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
446 ServerName serverName, int replicaId, RegionState.State state) throws KeeperException {
447 LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
448
449
450 MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
451 .setServer(ProtobufUtil.toServerName(serverName))
452 .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
453 .setState(state.convert()).build();
454 byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
455 try {
456 ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
457 } catch(KeeperException.NoNodeException nne) {
458 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
459 LOG.debug("META region location doesn't exist, create it");
460 } else {
461 LOG.debug("META region location doesn't exist for replicaId " + replicaId +
462 ", create it");
463 }
464 ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
465 }
466 }
467
468
469
470
471 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
472 return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
473 }
474
475
476
477
478
479
480
481
482 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)
483 throws KeeperException {
484 RegionState.State state = RegionState.State.OPEN;
485 ServerName serverName = null;
486 try {
487 byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId));
488 if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
489 try {
490 int prefixLen = ProtobufUtil.lengthOfPBMagic();
491 ZooKeeperProtos.MetaRegionServer rl =
492 ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
493 (data, prefixLen, data.length - prefixLen);
494 if (rl.hasState()) {
495 state = RegionState.State.convert(rl.getState());
496 }
497 HBaseProtos.ServerName sn = rl.getServer();
498 serverName = ServerName.valueOf(
499 sn.getHostName(), sn.getPort(), sn.getStartCode());
500 } catch (InvalidProtocolBufferException e) {
501 throw new DeserializationException("Unable to parse meta region location");
502 }
503 } else {
504
505 serverName = ServerName.parseFrom(data);
506 }
507 } catch (DeserializationException e) {
508 throw ZKUtil.convert(e);
509 } catch (InterruptedException e) {
510 Thread.currentThread().interrupt();
511 }
512 if (serverName == null) {
513 state = RegionState.State.OFFLINE;
514 }
515 return new RegionState(
516 RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId),
517 state, serverName);
518 }
519
520
521
522
523
524
525 public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
526 throws KeeperException {
527 deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID);
528 }
529
530 public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)
531 throws KeeperException {
532 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
533 LOG.info("Deleting hbase:meta region location in ZooKeeper");
534 } else {
535 LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper");
536 }
537 try {
538
539 ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId));
540 } catch(KeeperException.NoNodeException nne) {
541
542 }
543 }
544
545
546
547
548
549
550
551
552
553 public List<ServerName> blockUntilAvailable(final ZooKeeperWatcher zkw,
554 final long timeout, Configuration conf)
555 throws InterruptedException {
556 int numReplicasConfigured = 1;
557
558 List<ServerName> servers = new ArrayList<ServerName>();
559
560
561 ServerName server = blockUntilAvailable(zkw, timeout);
562 if (server == null) return null;
563 servers.add(server);
564
565 try {
566 List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
567 numReplicasConfigured = metaReplicaNodes.size();
568 } catch (KeeperException e) {
569 LOG.warn("Got ZK exception " + e);
570 }
571 for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
572
573 servers.add(getMetaRegionLocation(zkw, replicaId));
574 }
575 return servers;
576 }
577
578
579
580
581
582
583
584
585 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
586 final long timeout)
587 throws InterruptedException {
588 return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
589 }
590
591
592
593
594
595
596
597
598
599 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId,
600 final long timeout)
601 throws InterruptedException {
602 if (timeout < 0) throw new IllegalArgumentException();
603 if (zkw == null) throw new IllegalArgumentException();
604 Stopwatch sw = new Stopwatch().start();
605 ServerName sn = null;
606 try {
607 while (true) {
608 sn = getMetaRegionLocation(zkw, replicaId);
609 if (sn != null || sw.elapsedMillis()
610 > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
611 break;
612 }
613 Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
614 }
615 } finally {
616 sw.stop();
617 }
618 return sn;
619 }
620
621
622
623
624
625 public void stop() {
626 if (!stopped) {
627 LOG.debug("Stopping MetaTableLocator");
628 stopped = true;
629 }
630 }
631 }