View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import java.io.Closeable;
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.List;
26  import java.util.concurrent.CopyOnWriteArrayList;
27  import java.util.concurrent.CountDownLatch;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.hbase.Abortable;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.zookeeper.KeeperException;
37  import org.apache.zookeeper.WatchedEvent;
38  import org.apache.zookeeper.Watcher;
39  import org.apache.zookeeper.ZooDefs;
40  import org.apache.zookeeper.data.ACL;
41  
42  /**
43   * Acts as the single ZooKeeper Watcher.  One instance of this is instantiated
44   * for each Master, RegionServer, and client process.
45   *
46   * <p>This is the only class that implements {@link Watcher}.  Other internal
47   * classes which need to be notified of ZooKeeper events must register with
48   * the local instance of this watcher via {@link #registerListener}.
49   *
50   * <p>This class also holds and manages the connection to ZooKeeper.  Code to
51   * deal with connection related events and exceptions are handled here.
52   */
53  @InterfaceAudience.Private
54  public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
55    private static final Log LOG = LogFactory.getLog(ZooKeeperWatcher.class);
56  
57    // Identifier for this watcher (for logging only).  It is made of the prefix
58    // passed on construction and the zookeeper sessionid.
59    private String identifier;
60  
61    // zookeeper quorum
62    private String quorum;
63  
64    // zookeeper connection
65    private RecoverableZooKeeper recoverableZooKeeper;
66  
67    // abortable in case of zk failure
68    protected Abortable abortable;
69    // Used if abortable is null
70    private boolean aborted = false;
71  
72    // listeners to be notified
73    private final List<ZooKeeperListener> listeners =
74      new CopyOnWriteArrayList<ZooKeeperListener>();
75  
76    // Used by ZKUtil:waitForZKConnectionIfAuthenticating to wait for SASL
77    // negotiation to complete
78    public CountDownLatch saslLatch = new CountDownLatch(1);
79  
80    // node names
81  
82    // base znode for this cluster
83    public String baseZNode;
84    // znode containing location of server hosting meta region
85    public String metaServerZNode;
86    // znode containing ephemeral nodes of the regionservers
87    public String rsZNode;
88    // znode containing ephemeral nodes of the draining regionservers
89    public String drainingZNode;
90    // znode of currently active master
91    private String masterAddressZNode;
92    // znode of this master in backup master directory, if not the active master
93    public String backupMasterAddressesZNode;
94    // znode containing the current cluster state
95    public String clusterStateZNode;
96    // znode used for table disabling/enabling
97    @Deprecated
98    public String tableZNode;
99    // znode containing the unique cluster ID
100   public String clusterIdZNode;
101   // znode used for log splitting work assignment
102   public String splitLogZNode;
103   // znode containing the state of the load balancer
104   public String balancerZNode;
105   // znode containing the lock for the tables
106   public String tableLockZNode;
107   // znode containing the state of recovering regions
108   public String recoveringRegionsZNode;
109   // znode containing namespace descriptors
110   public static String namespaceZNode = "namespace";
111 
112 
113   // Certain ZooKeeper nodes need to be world-readable
114   public static final List<ACL> CREATOR_ALL_AND_WORLD_READABLE =
115     Arrays.asList(new ACL(ZooDefs.Perms.READ,ZooDefs.Ids.ANYONE_ID_UNSAFE),
116       new ACL(ZooDefs.Perms.ALL,ZooDefs.Ids.AUTH_IDS));
117 
118   private final Configuration conf;
119 
120   private final Exception constructorCaller;
121 
122   /**
123    * Instantiate a ZooKeeper connection and watcher.
124    * @param identifier string that is passed to RecoverableZookeeper to be used as
125    * identifier for this instance. Use null for default.
126    * @throws IOException
127    * @throws ZooKeeperConnectionException
128    */
129   public ZooKeeperWatcher(Configuration conf, String identifier,
130       Abortable abortable) throws ZooKeeperConnectionException, IOException {
131     this(conf, identifier, abortable, false);
132   }
133 
134   /**
135    * Instantiate a ZooKeeper connection and watcher.
136    * @param conf
137    * @param identifier string that is passed to RecoverableZookeeper to be used as identifier for
138    *          this instance. Use null for default.
139    * @param abortable Can be null if there is on error there is no host to abort: e.g. client
140    *          context.
141    * @param canCreateBaseZNode
142    * @throws IOException
143    * @throws ZooKeeperConnectionException
144    */
145   public ZooKeeperWatcher(Configuration conf, String identifier,
146       Abortable abortable, boolean canCreateBaseZNode)
147   throws IOException, ZooKeeperConnectionException {
148     this.conf = conf;
149     // Capture a stack trace now.  Will print it out later if problem so we can
150     // distingush amongst the myriad ZKWs.
151     try {
152       throw new Exception("ZKW CONSTRUCTOR STACK TRACE FOR DEBUGGING");
153     } catch (Exception e) {
154       this.constructorCaller = e;
155     }
156     this.quorum = ZKConfig.getZKQuorumServersString(conf);
157     // Identifier will get the sessionid appended later below down when we
158     // handle the syncconnect event.
159     this.identifier = identifier;
160     this.abortable = abortable;
161     setNodeNames(conf);
162     this.recoverableZooKeeper = ZKUtil.connect(conf, quorum, this, identifier);
163     if (canCreateBaseZNode) {
164       createBaseZNodes();
165     }
166   }
167 
168   private void createBaseZNodes() throws ZooKeeperConnectionException {
169     try {
170       // Create all the necessary "directories" of znodes
171       ZKUtil.createWithParents(this, baseZNode);
172       ZKUtil.createAndFailSilent(this, rsZNode);
173       ZKUtil.createAndFailSilent(this, drainingZNode);
174       ZKUtil.createAndFailSilent(this, tableZNode);
175       ZKUtil.createAndFailSilent(this, splitLogZNode);
176       ZKUtil.createAndFailSilent(this, backupMasterAddressesZNode);
177       ZKUtil.createAndFailSilent(this, tableLockZNode);
178       ZKUtil.createAndFailSilent(this, recoveringRegionsZNode);
179     } catch (KeeperException e) {
180       throw new ZooKeeperConnectionException(
181           prefix("Unexpected KeeperException creating base node"), e);
182     }
183   }
184 
185   @Override
186   public String toString() {
187     return this.identifier + ", quorum=" + quorum + ", baseZNode=" + baseZNode;
188   }
189 
190   /**
191    * Adds this instance's identifier as a prefix to the passed <code>str</code>
192    * @param str String to amend.
193    * @return A new string with this instance's identifier as prefix: e.g.
194    * if passed 'hello world', the returned string could be
195    */
196   public String prefix(final String str) {
197     return this.toString() + " " + str;
198   }
199 
200   /**
201    * Set the local variable node names using the specified configuration.
202    */
203   private void setNodeNames(Configuration conf) {
204     baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
205         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
206     metaServerZNode = ZKUtil.joinZNode(baseZNode,
207         conf.get("zookeeper.znode.metaserver", "meta-region-server"));
208     rsZNode = ZKUtil.joinZNode(baseZNode,
209         conf.get("zookeeper.znode.rs", "rs"));
210     drainingZNode = ZKUtil.joinZNode(baseZNode,
211         conf.get("zookeeper.znode.draining.rs", "draining"));
212     masterAddressZNode = ZKUtil.joinZNode(baseZNode,
213         conf.get("zookeeper.znode.master", "master"));
214     backupMasterAddressesZNode = ZKUtil.joinZNode(baseZNode,
215         conf.get("zookeeper.znode.backup.masters", "backup-masters"));
216     clusterStateZNode = ZKUtil.joinZNode(baseZNode,
217         conf.get("zookeeper.znode.state", "running"));
218     tableZNode = ZKUtil.joinZNode(baseZNode,
219         conf.get("zookeeper.znode.tableEnableDisable", "table"));
220     clusterIdZNode = ZKUtil.joinZNode(baseZNode,
221         conf.get("zookeeper.znode.clusterId", "hbaseid"));
222     splitLogZNode = ZKUtil.joinZNode(baseZNode,
223         conf.get("zookeeper.znode.splitlog", HConstants.SPLIT_LOGDIR_NAME));
224     balancerZNode = ZKUtil.joinZNode(baseZNode,
225         conf.get("zookeeper.znode.balancer", "balancer"));
226     tableLockZNode = ZKUtil.joinZNode(baseZNode,
227         conf.get("zookeeper.znode.tableLock", "table-lock"));
228     recoveringRegionsZNode = ZKUtil.joinZNode(baseZNode,
229         conf.get("zookeeper.znode.recovering.regions", "recovering-regions"));
230     namespaceZNode = ZKUtil.joinZNode(baseZNode,
231         conf.get("zookeeper.znode.namespace", "namespace"));
232   }
233 
234   /**
235    * Register the specified listener to receive ZooKeeper events.
236    * @param listener
237    */
238   public void registerListener(ZooKeeperListener listener) {
239     listeners.add(listener);
240   }
241 
242   /**
243    * Register the specified listener to receive ZooKeeper events and add it as
244    * the first in the list of current listeners.
245    * @param listener
246    */
247   public void registerListenerFirst(ZooKeeperListener listener) {
248     listeners.add(0, listener);
249   }
250 
251   public void unregisterListener(ZooKeeperListener listener) {
252     listeners.remove(listener);
253   }
254 
255   /**
256    * Clean all existing listeners
257    */
258   public void unregisterAllListeners() {
259     listeners.clear();
260   }
261 
262   /**
263    * Get a copy of current registered listeners
264    */
265   public List<ZooKeeperListener> getListeners() {
266     return new ArrayList<ZooKeeperListener>(listeners);
267   }
268 
269   /**
270    * @return The number of currently registered listeners
271    */
272   public int getNumberOfListeners() {
273     return listeners.size();
274   }
275 
276   /**
277    * Get the connection to ZooKeeper.
278    * @return connection reference to zookeeper
279    */
280   public RecoverableZooKeeper getRecoverableZooKeeper() {
281     return recoverableZooKeeper;
282   }
283 
284   public void reconnectAfterExpiration() throws IOException, KeeperException, InterruptedException {
285     recoverableZooKeeper.reconnectAfterExpiration();
286   }
287 
288   /**
289    * Get the quorum address of this instance.
290    * @return quorum string of this zookeeper connection instance
291    */
292   public String getQuorum() {
293     return quorum;
294   }
295 
296   /**
297    * Method called from ZooKeeper for events and connection status.
298    * <p>
299    * Valid events are passed along to listeners.  Connection status changes
300    * are dealt with locally.
301    */
302   @Override
303   public void process(WatchedEvent event) {
304     LOG.debug(prefix("Received ZooKeeper Event, " +
305         "type=" + event.getType() + ", " +
306         "state=" + event.getState() + ", " +
307         "path=" + event.getPath()));
308 
309     switch(event.getType()) {
310 
311       // If event type is NONE, this is a connection status change
312       case None: {
313         connectionEvent(event);
314         break;
315       }
316 
317       // Otherwise pass along to the listeners
318 
319       case NodeCreated: {
320         for(ZooKeeperListener listener : listeners) {
321           listener.nodeCreated(event.getPath());
322         }
323         break;
324       }
325 
326       case NodeDeleted: {
327         for(ZooKeeperListener listener : listeners) {
328           listener.nodeDeleted(event.getPath());
329         }
330         break;
331       }
332 
333       case NodeDataChanged: {
334         for(ZooKeeperListener listener : listeners) {
335           listener.nodeDataChanged(event.getPath());
336         }
337         break;
338       }
339 
340       case NodeChildrenChanged: {
341         for(ZooKeeperListener listener : listeners) {
342           listener.nodeChildrenChanged(event.getPath());
343         }
344         break;
345       }
346     }
347   }
348 
349   // Connection management
350 
351   /**
352    * Called when there is a connection-related event via the Watcher callback.
353    * <p>
354    * If Disconnected or Expired, this should shutdown the cluster. But, since
355    * we send a KeeperException.SessionExpiredException along with the abort
356    * call, it's possible for the Abortable to catch it and try to create a new
357    * session with ZooKeeper. This is what the client does in HCM.
358    * <p>
359    * @param event
360    */
361   private void connectionEvent(WatchedEvent event) {
362     switch(event.getState()) {
363       case SyncConnected:
364         // Now, this callback can be invoked before the this.zookeeper is set.
365         // Wait a little while.
366         long finished = System.currentTimeMillis() +
367           this.conf.getLong("hbase.zookeeper.watcher.sync.connected.wait", 2000);
368         while (System.currentTimeMillis() < finished) {
369           try {
370             Thread.sleep(1);
371           } catch (InterruptedException e) {
372             LOG.warn("Interrupted while sleeping");
373             throw new RuntimeException("Interrupted while waiting for" +
374                 " recoverableZooKeeper is set");
375           }
376           if (this.recoverableZooKeeper != null) break;
377         }
378 
379         if (this.recoverableZooKeeper == null) {
380           LOG.error("ZK is null on connection event -- see stack trace " +
381             "for the stack trace when constructor was called on this zkw",
382             this.constructorCaller);
383           throw new NullPointerException("ZK is null");
384         }
385         this.identifier = this.identifier + "-0x" +
386           Long.toHexString(this.recoverableZooKeeper.getSessionId());
387         // Update our identifier.  Otherwise ignore.
388         LOG.debug(this.identifier + " connected");
389         break;
390 
391       // Abort the server if Disconnected or Expired
392       case Disconnected:
393         LOG.debug(prefix("Received Disconnected from ZooKeeper, ignoring"));
394         break;
395 
396       case Expired:
397         String msg = prefix(this.identifier + " received expired from " +
398           "ZooKeeper, aborting");
399         // TODO: One thought is to add call to ZooKeeperListener so say,
400         // ZooKeeperNodeTracker can zero out its data values.
401         if (this.abortable != null) {
402           this.abortable.abort(msg, new KeeperException.SessionExpiredException());
403         }
404         break;
405 
406       case ConnectedReadOnly:
407       case SaslAuthenticated:
408       case AuthFailed:
409         break;
410 
411       default:
412         throw new IllegalStateException("Received event is not valid: " + event.getState());
413     }
414   }
415 
416   /**
417    * Forces a synchronization of this ZooKeeper client connection.
418    * <p>
419    * Executing this method before running other methods will ensure that the
420    * subsequent operations are up-to-date and consistent as of the time that
421    * the sync is complete.
422    * <p>
423    * This is used for compareAndSwap type operations where we need to read the
424    * data of an existing node and delete or transition that node, utilizing the
425    * previously read version and data.  We want to ensure that the version read
426    * is up-to-date from when we begin the operation.
427    */
428   public void sync(String path) throws KeeperException {
429     this.recoverableZooKeeper.sync(path, null, null);
430   }
431 
432   /**
433    * Handles KeeperExceptions in client calls.
434    * <p>
435    * This may be temporary but for now this gives one place to deal with these.
436    * <p>
437    * TODO: Currently this method rethrows the exception to let the caller handle
438    * <p>
439    * @param ke
440    * @throws KeeperException
441    */
442   public void keeperException(KeeperException ke)
443   throws KeeperException {
444     LOG.error(prefix("Received unexpected KeeperException, re-throwing exception"), ke);
445     throw ke;
446   }
447 
448   /**
449    * Handles InterruptedExceptions in client calls.
450    * <p>
451    * This may be temporary but for now this gives one place to deal with these.
452    * <p>
453    * TODO: Currently, this method does nothing.
454    *       Is this ever expected to happen?  Do we abort or can we let it run?
455    *       Maybe this should be logged as WARN?  It shouldn't happen?
456    * <p>
457    * @param ie
458    */
459   public void interruptedException(InterruptedException ie) {
460     LOG.debug(prefix("Received InterruptedException, doing nothing here"), ie);
461     // At least preserver interrupt.
462     Thread.currentThread().interrupt();
463     // no-op
464   }
465 
466   /**
467    * Close the connection to ZooKeeper.
468    *
469    * @throws InterruptedException
470    */
471   public void close() {
472     try {
473       if (recoverableZooKeeper != null) {
474         recoverableZooKeeper.close();
475       }
476     } catch (InterruptedException e) {
477       Thread.currentThread().interrupt();
478     }
479   }
480 
481   public Configuration getConfiguration() {
482     return conf;
483   }
484 
485   @Override
486   public void abort(String why, Throwable e) {
487     if (this.abortable != null) this.abortable.abort(why, e);
488     else this.aborted = true;
489   }
490 
491   @Override
492   public boolean isAborted() {
493     return this.abortable == null? this.aborted: this.abortable.isAborted();
494   }
495 
496   /**
497    * @return Path to the currently active master.
498    */
499   public String getMasterAddressZNode() {
500     return this.masterAddressZNode;
501   }
502 
503 }