View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import java.io.Closeable;
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.List;
26  import java.util.concurrent.CopyOnWriteArrayList;
27  import java.util.concurrent.CountDownLatch;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.hbase.Abortable;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.zookeeper.KeeperException;
37  import org.apache.zookeeper.WatchedEvent;
38  import org.apache.zookeeper.Watcher;
39  import org.apache.zookeeper.ZooDefs;
40  import org.apache.zookeeper.data.ACL;
41  
42  /**
43   * Acts as the single ZooKeeper Watcher.  One instance of this is instantiated
44   * for each Master, RegionServer, and client process.
45   *
46   * <p>This is the only class that implements {@link Watcher}.  Other internal
47   * classes which need to be notified of ZooKeeper events must register with
48   * the local instance of this watcher via {@link #registerListener}.
49   *
50   * <p>This class also holds and manages the connection to ZooKeeper.  Code to
51   * deal with connection related events and exceptions are handled here.
52   */
53  @InterfaceAudience.Private
54  public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
55    private static final Log LOG = LogFactory.getLog(ZooKeeperWatcher.class);
56  
57    // Identifier for this watcher (for logging only).  It is made of the prefix
58    // passed on construction and the zookeeper sessionid.
59    private String prefix;
60    private String identifier;
61  
62    // zookeeper quorum
63    private String quorum;
64  
65    // zookeeper connection
66    private RecoverableZooKeeper recoverableZooKeeper;
67  
68    // abortable in case of zk failure
69    protected Abortable abortable;
70    // Used if abortable is null
71    private boolean aborted = false;
72  
73    // listeners to be notified
74    private final List<ZooKeeperListener> listeners =
75      new CopyOnWriteArrayList<ZooKeeperListener>();
76  
77    // Used by ZKUtil:waitForZKConnectionIfAuthenticating to wait for SASL
78    // negotiation to complete
79    public CountDownLatch saslLatch = new CountDownLatch(1);
80  
81    // node names
82  
83    // base znode for this cluster
84    public String baseZNode;
85    // znode containing location of server hosting meta region
86    public String metaServerZNode;
87    // znode containing ephemeral nodes of the regionservers
88    public String rsZNode;
89    // znode containing ephemeral nodes of the draining regionservers
90    public String drainingZNode;
91    // znode of currently active master
92    private String masterAddressZNode;
93    // znode of this master in backup master directory, if not the active master
94    public String backupMasterAddressesZNode;
95    // znode containing the current cluster state
96    public String clusterStateZNode;
97    // znode used for table disabling/enabling
98    @Deprecated
99    public String tableZNode;
100   // znode containing the unique cluster ID
101   public String clusterIdZNode;
102   // znode used for log splitting work assignment
103   public String splitLogZNode;
104   // znode containing the state of the load balancer
105   public String balancerZNode;
106   // znode containing the lock for the tables
107   public String tableLockZNode;
108   // znode containing the state of recovering regions
109   public String recoveringRegionsZNode;
110   // znode containing namespace descriptors
111   public static String namespaceZNode = "namespace";
112 
113 
114   private final Configuration conf;
115 
116   private final Exception constructorCaller;
117 
118   /**
119    * Instantiate a ZooKeeper connection and watcher.
120    * @param identifier string that is passed to RecoverableZookeeper to be used as
121    * identifier for this instance. Use null for default.
122    * @throws IOException
123    * @throws ZooKeeperConnectionException
124    */
125   public ZooKeeperWatcher(Configuration conf, String identifier,
126       Abortable abortable) throws ZooKeeperConnectionException, IOException {
127     this(conf, identifier, abortable, false);
128   }
129 
130   /**
131    * Instantiate a ZooKeeper connection and watcher.
132    * @param conf
133    * @param identifier string that is passed to RecoverableZookeeper to be used as identifier for
134    *          this instance. Use null for default.
135    * @param abortable Can be null if there is on error there is no host to abort: e.g. client
136    *          context.
137    * @param canCreateBaseZNode
138    * @throws IOException
139    * @throws ZooKeeperConnectionException
140    */
141   public ZooKeeperWatcher(Configuration conf, String identifier,
142       Abortable abortable, boolean canCreateBaseZNode)
143   throws IOException, ZooKeeperConnectionException {
144     this.conf = conf;
145     // Capture a stack trace now.  Will print it out later if problem so we can
146     // distingush amongst the myriad ZKWs.
147     try {
148       throw new Exception("ZKW CONSTRUCTOR STACK TRACE FOR DEBUGGING");
149     } catch (Exception e) {
150       this.constructorCaller = e;
151     }
152     this.quorum = ZKConfig.getZKQuorumServersString(conf);
153     this.prefix = identifier;
154     // Identifier will get the sessionid appended later below down when we
155     // handle the syncconnect event.
156     this.identifier = identifier + "0x0";
157     this.abortable = abortable;
158     setNodeNames(conf);
159     this.recoverableZooKeeper = ZKUtil.connect(conf, quorum, this, identifier);
160     if (canCreateBaseZNode) {
161       createBaseZNodes();
162     }
163   }
164 
165   private void createBaseZNodes() throws ZooKeeperConnectionException {
166     try {
167       // Create all the necessary "directories" of znodes
168       ZKUtil.createWithParents(this, baseZNode);
169       ZKUtil.createAndFailSilent(this, rsZNode);
170       ZKUtil.createAndFailSilent(this, drainingZNode);
171       ZKUtil.createAndFailSilent(this, tableZNode);
172       ZKUtil.createAndFailSilent(this, splitLogZNode);
173       ZKUtil.createAndFailSilent(this, backupMasterAddressesZNode);
174       ZKUtil.createAndFailSilent(this, tableLockZNode);
175       ZKUtil.createAndFailSilent(this, recoveringRegionsZNode);
176     } catch (KeeperException e) {
177       throw new ZooKeeperConnectionException(
178           prefix("Unexpected KeeperException creating base node"), e);
179     }
180   }
181 
182   @Override
183   public String toString() {
184     return this.identifier + ", quorum=" + quorum + ", baseZNode=" + baseZNode;
185   }
186 
187   /**
188    * Adds this instance's identifier as a prefix to the passed <code>str</code>
189    * @param str String to amend.
190    * @return A new string with this instance's identifier as prefix: e.g.
191    * if passed 'hello world', the returned string could be
192    */
193   public String prefix(final String str) {
194     return this.toString() + " " + str;
195   }
196 
197   /**
198    * Set the local variable node names using the specified configuration.
199    */
200   private void setNodeNames(Configuration conf) {
201     baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
202         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
203     metaServerZNode = ZKUtil.joinZNode(baseZNode,
204         conf.get("zookeeper.znode.metaserver", "meta-region-server"));
205     rsZNode = ZKUtil.joinZNode(baseZNode,
206         conf.get("zookeeper.znode.rs", "rs"));
207     drainingZNode = ZKUtil.joinZNode(baseZNode,
208         conf.get("zookeeper.znode.draining.rs", "draining"));
209     masterAddressZNode = ZKUtil.joinZNode(baseZNode,
210         conf.get("zookeeper.znode.master", "master"));
211     backupMasterAddressesZNode = ZKUtil.joinZNode(baseZNode,
212         conf.get("zookeeper.znode.backup.masters", "backup-masters"));
213     clusterStateZNode = ZKUtil.joinZNode(baseZNode,
214         conf.get("zookeeper.znode.state", "running"));
215     tableZNode = ZKUtil.joinZNode(baseZNode,
216         conf.get("zookeeper.znode.tableEnableDisable", "table"));
217     clusterIdZNode = ZKUtil.joinZNode(baseZNode,
218         conf.get("zookeeper.znode.clusterId", "hbaseid"));
219     splitLogZNode = ZKUtil.joinZNode(baseZNode,
220         conf.get("zookeeper.znode.splitlog", HConstants.SPLIT_LOGDIR_NAME));
221     balancerZNode = ZKUtil.joinZNode(baseZNode,
222         conf.get("zookeeper.znode.balancer", "balancer"));
223     tableLockZNode = ZKUtil.joinZNode(baseZNode,
224         conf.get("zookeeper.znode.tableLock", "table-lock"));
225     recoveringRegionsZNode = ZKUtil.joinZNode(baseZNode,
226         conf.get("zookeeper.znode.recovering.regions", "recovering-regions"));
227     namespaceZNode = ZKUtil.joinZNode(baseZNode,
228         conf.get("zookeeper.znode.namespace", "namespace"));
229   }
230 
231   /**
232    * Register the specified listener to receive ZooKeeper events.
233    * @param listener
234    */
235   public void registerListener(ZooKeeperListener listener) {
236     listeners.add(listener);
237   }
238 
239   /**
240    * Register the specified listener to receive ZooKeeper events and add it as
241    * the first in the list of current listeners.
242    * @param listener
243    */
244   public void registerListenerFirst(ZooKeeperListener listener) {
245     listeners.add(0, listener);
246   }
247 
248   public void unregisterListener(ZooKeeperListener listener) {
249     listeners.remove(listener);
250   }
251 
252   /**
253    * Clean all existing listeners
254    */
255   public void unregisterAllListeners() {
256     listeners.clear();
257   }
258 
259   /**
260    * Get a copy of current registered listeners
261    */
262   public List<ZooKeeperListener> getListeners() {
263     return new ArrayList<ZooKeeperListener>(listeners);
264   }
265 
266   /**
267    * @return The number of currently registered listeners
268    */
269   public int getNumberOfListeners() {
270     return listeners.size();
271   }
272 
273   /**
274    * Get the connection to ZooKeeper.
275    * @return connection reference to zookeeper
276    */
277   public RecoverableZooKeeper getRecoverableZooKeeper() {
278     return recoverableZooKeeper;
279   }
280 
281   public void reconnectAfterExpiration() throws IOException, KeeperException, InterruptedException {
282     recoverableZooKeeper.reconnectAfterExpiration();
283   }
284 
285   /**
286    * Get the quorum address of this instance.
287    * @return quorum string of this zookeeper connection instance
288    */
289   public String getQuorum() {
290     return quorum;
291   }
292 
293   /**
294    * Method called from ZooKeeper for events and connection status.
295    * <p>
296    * Valid events are passed along to listeners.  Connection status changes
297    * are dealt with locally.
298    */
299   @Override
300   public void process(WatchedEvent event) {
301     LOG.debug(prefix("Received ZooKeeper Event, " +
302         "type=" + event.getType() + ", " +
303         "state=" + event.getState() + ", " +
304         "path=" + event.getPath()));
305 
306     switch(event.getType()) {
307 
308       // If event type is NONE, this is a connection status change
309       case None: {
310         connectionEvent(event);
311         break;
312       }
313 
314       // Otherwise pass along to the listeners
315 
316       case NodeCreated: {
317         for(ZooKeeperListener listener : listeners) {
318           listener.nodeCreated(event.getPath());
319         }
320         break;
321       }
322 
323       case NodeDeleted: {
324         for(ZooKeeperListener listener : listeners) {
325           listener.nodeDeleted(event.getPath());
326         }
327         break;
328       }
329 
330       case NodeDataChanged: {
331         for(ZooKeeperListener listener : listeners) {
332           listener.nodeDataChanged(event.getPath());
333         }
334         break;
335       }
336 
337       case NodeChildrenChanged: {
338         for(ZooKeeperListener listener : listeners) {
339           listener.nodeChildrenChanged(event.getPath());
340         }
341         break;
342       }
343     }
344   }
345 
346   // Connection management
347 
348   /**
349    * Called when there is a connection-related event via the Watcher callback.
350    * <p>
351    * If Disconnected or Expired, this should shutdown the cluster. But, since
352    * we send a KeeperException.SessionExpiredException along with the abort
353    * call, it's possible for the Abortable to catch it and try to create a new
354    * session with ZooKeeper. This is what the client does in HCM.
355    * <p>
356    * @param event
357    */
358   private void connectionEvent(WatchedEvent event) {
359     switch(event.getState()) {
360       case SyncConnected:
361         // Now, this callback can be invoked before the this.zookeeper is set.
362         // Wait a little while.
363         long finished = System.currentTimeMillis() +
364           this.conf.getLong("hbase.zookeeper.watcher.sync.connected.wait", 2000);
365         while (System.currentTimeMillis() < finished) {
366           try {
367             Thread.sleep(1);
368           } catch (InterruptedException e) {
369             LOG.warn("Interrupted while sleeping");
370             throw new RuntimeException("Interrupted while waiting for" +
371                 " recoverableZooKeeper is set");
372           }
373           if (this.recoverableZooKeeper != null) break;
374         }
375 
376         if (this.recoverableZooKeeper == null) {
377           LOG.error("ZK is null on connection event -- see stack trace " +
378             "for the stack trace when constructor was called on this zkw",
379             this.constructorCaller);
380           throw new NullPointerException("ZK is null");
381         }
382         this.identifier = this.prefix + "-0x" +
383           Long.toHexString(this.recoverableZooKeeper.getSessionId());
384         // Update our identifier.  Otherwise ignore.
385         LOG.debug(this.identifier + " connected");
386         break;
387 
388       // Abort the server if Disconnected or Expired
389       case Disconnected:
390         LOG.debug(prefix("Received Disconnected from ZooKeeper, ignoring"));
391         break;
392 
393       case Expired:
394         String msg = prefix(this.identifier + " received expired from " +
395           "ZooKeeper, aborting");
396         // TODO: One thought is to add call to ZooKeeperListener so say,
397         // ZooKeeperNodeTracker can zero out its data values.
398         if (this.abortable != null) {
399           this.abortable.abort(msg, new KeeperException.SessionExpiredException());
400         }
401         break;
402 
403       case ConnectedReadOnly:
404       case SaslAuthenticated:
405       case AuthFailed:
406         break;
407 
408       default:
409         throw new IllegalStateException("Received event is not valid: " + event.getState());
410     }
411   }
412 
413   /**
414    * Forces a synchronization of this ZooKeeper client connection.
415    * <p>
416    * Executing this method before running other methods will ensure that the
417    * subsequent operations are up-to-date and consistent as of the time that
418    * the sync is complete.
419    * <p>
420    * This is used for compareAndSwap type operations where we need to read the
421    * data of an existing node and delete or transition that node, utilizing the
422    * previously read version and data.  We want to ensure that the version read
423    * is up-to-date from when we begin the operation.
424    */
425   public void sync(String path) throws KeeperException {
426     this.recoverableZooKeeper.sync(path, null, null);
427   }
428 
429   /**
430    * Handles KeeperExceptions in client calls.
431    * <p>
432    * This may be temporary but for now this gives one place to deal with these.
433    * <p>
434    * TODO: Currently this method rethrows the exception to let the caller handle
435    * <p>
436    * @param ke
437    * @throws KeeperException
438    */
439   public void keeperException(KeeperException ke)
440   throws KeeperException {
441     LOG.error(prefix("Received unexpected KeeperException, re-throwing exception"), ke);
442     throw ke;
443   }
444 
445   /**
446    * Handles InterruptedExceptions in client calls.
447    * <p>
448    * This may be temporary but for now this gives one place to deal with these.
449    * <p>
450    * TODO: Currently, this method does nothing.
451    *       Is this ever expected to happen?  Do we abort or can we let it run?
452    *       Maybe this should be logged as WARN?  It shouldn't happen?
453    * <p>
454    * @param ie
455    */
456   public void interruptedException(InterruptedException ie) {
457     LOG.debug(prefix("Received InterruptedException, doing nothing here"), ie);
458     // At least preserver interrupt.
459     Thread.currentThread().interrupt();
460     // no-op
461   }
462 
463   /**
464    * Close the connection to ZooKeeper.
465    *
466    * @throws InterruptedException
467    */
468   public void close() {
469     try {
470       if (recoverableZooKeeper != null) {
471         recoverableZooKeeper.close();
472       }
473     } catch (InterruptedException e) {
474       Thread.currentThread().interrupt();
475     }
476   }
477 
478   public Configuration getConfiguration() {
479     return conf;
480   }
481 
482   @Override
483   public void abort(String why, Throwable e) {
484     if (this.abortable != null) this.abortable.abort(why, e);
485     else this.aborted = true;
486   }
487 
488   @Override
489   public boolean isAborted() {
490     return this.abortable == null? this.aborted: this.abortable.isAborted();
491   }
492 
493   /**
494    * @return Path to the currently active master.
495    */
496   public String getMasterAddressZNode() {
497     return this.masterAddressZNode;
498   }
499 
500 }