View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.io.InputStreamReader;
24  import java.io.PrintWriter;
25  import java.net.InetSocketAddress;
26  import java.net.Socket;
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.Deque;
30  import java.util.HashMap;
31  import java.util.LinkedList;
32  import java.util.List;
33  import java.util.Map;
34  
35  import javax.security.auth.login.AppConfigurationEntry;
36  import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag;
37  
38  import org.apache.commons.lang.StringUtils;
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.hbase.HBaseConfiguration;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.classification.InterfaceAudience;
46  import org.apache.hadoop.hbase.exceptions.DeserializationException;
47  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48  import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos;
49  import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds;
50  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
51  import org.apache.hadoop.hbase.util.ByteStringer;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.Threads;
54  import org.apache.hadoop.hbase.zookeeper.ZKUtil.ZKUtilOp.CreateAndFailSilent;
55  import org.apache.hadoop.hbase.zookeeper.ZKUtil.ZKUtilOp.DeleteNodeFailSilent;
56  import org.apache.hadoop.hbase.zookeeper.ZKUtil.ZKUtilOp.SetData;
57  import org.apache.hadoop.security.SecurityUtil;
58  import org.apache.hadoop.security.authentication.util.KerberosUtil;
59  import org.apache.zookeeper.AsyncCallback;
60  import org.apache.zookeeper.CreateMode;
61  import org.apache.zookeeper.KeeperException;
62  import org.apache.zookeeper.KeeperException.NoNodeException;
63  import org.apache.zookeeper.Op;
64  import org.apache.zookeeper.Watcher;
65  import org.apache.zookeeper.ZooDefs.Ids;
66  import org.apache.zookeeper.ZooDefs.Perms;
67  import org.apache.zookeeper.ZooKeeper;
68  import org.apache.zookeeper.client.ZooKeeperSaslClient;
69  import org.apache.zookeeper.data.ACL;
70  import org.apache.zookeeper.data.Id;
71  import org.apache.zookeeper.data.Stat;
72  import org.apache.zookeeper.proto.CreateRequest;
73  import org.apache.zookeeper.proto.DeleteRequest;
74  import org.apache.zookeeper.proto.SetDataRequest;
75  import org.apache.zookeeper.server.ZooKeeperSaslServer;
76  
77  import com.google.common.annotations.VisibleForTesting;
78  import com.google.protobuf.InvalidProtocolBufferException;
79  
80  /**
81   * Internal HBase utility class for ZooKeeper.
82   *
83   * <p>Contains only static methods and constants.
84   *
85   * <p>Methods all throw {@link KeeperException} if there is an unexpected
86   * zookeeper exception, so callers of these methods must handle appropriately.
87   * If ZK is required for the operation, the server will need to be aborted.
88   */
89  @InterfaceAudience.Private
90  public class ZKUtil {
91    private static final Log LOG = LogFactory.getLog(ZKUtil.class);
92  
93    // TODO: Replace this with ZooKeeper constant when ZOOKEEPER-277 is resolved.
94    public static final char ZNODE_PATH_SEPARATOR = '/';
95    private static int zkDumpConnectionTimeOut;
96  
97    // The Quorum for the ZK cluster can have one the following format (see examples below):
98    // (1). s1,s2,s3 (no client port in the list, the client port could be obtained from clientPort)
99    // (2). s1:p1,s2:p2,s3:p3 (with client port, which could be same or different for each server,
100   //      in this case, the clientPort would be ignored)
101   // (3). s1:p1,s2,s3:p3 (mix of (1) and (2) - if port is not specified in a server, it would use
102   //      the clientPort; otherwise, it would use the specified port)
103   @VisibleForTesting
104   public static class ZKClusterKey {
105     public String quorumString;
106     public int clientPort;
107     public String znodeParent;
108 
109     ZKClusterKey(String quorumString, int clientPort, String znodeParent) {
110       this.quorumString = quorumString;
111       this.clientPort = clientPort;
112       this.znodeParent = znodeParent;
113     }
114   }
115 
116   /**
117    * Creates a new connection to ZooKeeper, pulling settings and ensemble config
118    * from the specified configuration object using methods from {@link ZKConfig}.
119    *
120    * Sets the connection status monitoring watcher to the specified watcher.
121    *
122    * @param conf configuration to pull ensemble and other settings from
123    * @param watcher watcher to monitor connection changes
124    * @return connection to zookeeper
125    * @throws IOException if unable to connect to zk or config problem
126    */
127   public static RecoverableZooKeeper connect(Configuration conf, Watcher watcher)
128   throws IOException {
129     String ensemble = ZKConfig.getZKQuorumServersString(conf);
130     return connect(conf, ensemble, watcher);
131   }
132 
133   public static RecoverableZooKeeper connect(Configuration conf, String ensemble,
134       Watcher watcher)
135   throws IOException {
136     return connect(conf, ensemble, watcher, null);
137   }
138 
139   public static RecoverableZooKeeper connect(Configuration conf, String ensemble,
140       Watcher watcher, final String identifier)
141   throws IOException {
142     if(ensemble == null) {
143       throw new IOException("Unable to determine ZooKeeper ensemble");
144     }
145     int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
146         HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
147     if (LOG.isTraceEnabled()) {
148       LOG.trace(identifier + " opening connection to ZooKeeper ensemble=" + ensemble);
149     }
150     int retry = conf.getInt("zookeeper.recovery.retry", 3);
151     int retryIntervalMillis =
152       conf.getInt("zookeeper.recovery.retry.intervalmill", 1000);
153     zkDumpConnectionTimeOut = conf.getInt("zookeeper.dump.connection.timeout",
154         1000);
155     return new RecoverableZooKeeper(ensemble, timeout, watcher,
156         retry, retryIntervalMillis, identifier);
157   }
158 
159   /**
160    * Log in the current zookeeper server process using the given configuration
161    * keys for the credential file and login principal.
162    *
163    * <p><strong>This is only applicable when running on secure hbase</strong>
164    * On regular HBase (without security features), this will safely be ignored.
165    * </p>
166    *
167    * @param conf The configuration data to use
168    * @param keytabFileKey Property key used to configure the path to the credential file
169    * @param userNameKey Property key used to configure the login principal
170    * @param hostname Current hostname to use in any credentials
171    * @throws IOException underlying exception from SecurityUtil.login() call
172    */
173   public static void loginServer(Configuration conf, String keytabFileKey,
174       String userNameKey, String hostname) throws IOException {
175     login(conf, keytabFileKey, userNameKey, hostname,
176           ZooKeeperSaslServer.LOGIN_CONTEXT_NAME_KEY,
177           JaasConfiguration.SERVER_KEYTAB_KERBEROS_CONFIG_NAME);
178   }
179 
180   /**
181    * Log in the current zookeeper client using the given configuration
182    * keys for the credential file and login principal.
183    *
184    * <p><strong>This is only applicable when running on secure hbase</strong>
185    * On regular HBase (without security features), this will safely be ignored.
186    * </p>
187    *
188    * @param conf The configuration data to use
189    * @param keytabFileKey Property key used to configure the path to the credential file
190    * @param userNameKey Property key used to configure the login principal
191    * @param hostname Current hostname to use in any credentials
192    * @throws IOException underlying exception from SecurityUtil.login() call
193    */
194   public static void loginClient(Configuration conf, String keytabFileKey,
195       String userNameKey, String hostname) throws IOException {
196     login(conf, keytabFileKey, userNameKey, hostname,
197           ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
198           JaasConfiguration.CLIENT_KEYTAB_KERBEROS_CONFIG_NAME);
199   }
200 
201   /**
202    * Log in the current process using the given configuration keys for the
203    * credential file and login principal.
204    *
205    * <p><strong>This is only applicable when running on secure hbase</strong>
206    * On regular HBase (without security features), this will safely be ignored.
207    * </p>
208    *
209    * @param conf The configuration data to use
210    * @param keytabFileKey Property key used to configure the path to the credential file
211    * @param userNameKey Property key used to configure the login principal
212    * @param hostname Current hostname to use in any credentials
213    * @param loginContextProperty property name to expose the entry name
214    * @param loginContextName jaas entry name
215    * @throws IOException underlying exception from SecurityUtil.login() call
216    */
217   private static void login(Configuration conf, String keytabFileKey,
218       String userNameKey, String hostname,
219       String loginContextProperty, String loginContextName)
220       throws IOException {
221     if (!isSecureZooKeeper(conf))
222       return;
223 
224     // User has specified a jaas.conf, keep this one as the good one.
225     // HBASE_OPTS="-Djava.security.auth.login.config=jaas.conf"
226     if (System.getProperty("java.security.auth.login.config") != null)
227       return;
228 
229     // No keytab specified, no auth
230     String keytabFilename = conf.get(keytabFileKey);
231     if (keytabFilename == null) {
232       LOG.warn("no keytab specified for: " + keytabFileKey);
233       return;
234     }
235 
236     String principalConfig = conf.get(userNameKey, System.getProperty("user.name"));
237     String principalName = SecurityUtil.getServerPrincipal(principalConfig, hostname);
238 
239     // Initialize the "jaas.conf" for keyTab/principal,
240     // If keyTab is not specified use the Ticket Cache.
241     // and set the zookeeper login context name.
242     JaasConfiguration jaasConf = new JaasConfiguration(loginContextName,
243         principalName, keytabFilename);
244     javax.security.auth.login.Configuration.setConfiguration(jaasConf);
245     System.setProperty(loginContextProperty, loginContextName);
246   }
247 
248   /**
249    * A JAAS configuration that defines the login modules that we want to use for login.
250    */
251   private static class JaasConfiguration extends javax.security.auth.login.Configuration {
252     private static final String SERVER_KEYTAB_KERBEROS_CONFIG_NAME =
253       "zookeeper-server-keytab-kerberos";
254     private static final String CLIENT_KEYTAB_KERBEROS_CONFIG_NAME =
255       "zookeeper-client-keytab-kerberos";
256 
257     private static final Map<String, String> BASIC_JAAS_OPTIONS =
258       new HashMap<String,String>();
259     static {
260       String jaasEnvVar = System.getenv("HBASE_JAAS_DEBUG");
261       if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) {
262         BASIC_JAAS_OPTIONS.put("debug", "true");
263       }
264     }
265 
266     private static final Map<String,String> KEYTAB_KERBEROS_OPTIONS =
267       new HashMap<String,String>();
268     static {
269       KEYTAB_KERBEROS_OPTIONS.put("doNotPrompt", "true");
270       KEYTAB_KERBEROS_OPTIONS.put("storeKey", "true");
271       KEYTAB_KERBEROS_OPTIONS.put("refreshKrb5Config", "true");
272       KEYTAB_KERBEROS_OPTIONS.putAll(BASIC_JAAS_OPTIONS);
273     }
274 
275     private static final AppConfigurationEntry KEYTAB_KERBEROS_LOGIN =
276       new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(),
277                                 LoginModuleControlFlag.REQUIRED,
278                                 KEYTAB_KERBEROS_OPTIONS);
279 
280     private static final AppConfigurationEntry[] KEYTAB_KERBEROS_CONF =
281       new AppConfigurationEntry[]{KEYTAB_KERBEROS_LOGIN};
282 
283     private javax.security.auth.login.Configuration baseConfig;
284     private final String loginContextName;
285     private final boolean useTicketCache;
286     private final String keytabFile;
287     private final String principal;
288 
289     public JaasConfiguration(String loginContextName, String principal) {
290       this(loginContextName, principal, null, true);
291     }
292 
293     public JaasConfiguration(String loginContextName, String principal, String keytabFile) {
294       this(loginContextName, principal, keytabFile, keytabFile == null || keytabFile.length() == 0);
295     }
296 
297     private JaasConfiguration(String loginContextName, String principal,
298                              String keytabFile, boolean useTicketCache) {
299       try {
300         this.baseConfig = javax.security.auth.login.Configuration.getConfiguration();
301       } catch (SecurityException e) {
302         this.baseConfig = null;
303       }
304       this.loginContextName = loginContextName;
305       this.useTicketCache = useTicketCache;
306       this.keytabFile = keytabFile;
307       this.principal = principal;
308       LOG.info("JaasConfiguration loginContextName=" + loginContextName +
309                " principal=" + principal + " useTicketCache=" + useTicketCache +
310                " keytabFile=" + keytabFile);
311     }
312 
313     @Override
314     public AppConfigurationEntry[] getAppConfigurationEntry(String appName) {
315       if (loginContextName.equals(appName)) {
316         if (!useTicketCache) {
317           KEYTAB_KERBEROS_OPTIONS.put("keyTab", keytabFile);
318           KEYTAB_KERBEROS_OPTIONS.put("useKeyTab", "true");
319         }
320         KEYTAB_KERBEROS_OPTIONS.put("principal", principal);
321         KEYTAB_KERBEROS_OPTIONS.put("useTicketCache", useTicketCache ? "true" : "false");
322         return KEYTAB_KERBEROS_CONF;
323       }
324       if (baseConfig != null) return baseConfig.getAppConfigurationEntry(appName);
325       return(null);
326     }
327   }
328 
329   //
330   // Helper methods
331   //
332 
333   /**
334    * Join the prefix znode name with the suffix znode name to generate a proper
335    * full znode name.
336    *
337    * Assumes prefix does not end with slash and suffix does not begin with it.
338    *
339    * @param prefix beginning of znode name
340    * @param suffix ending of znode name
341    * @return result of properly joining prefix with suffix
342    */
343   public static String joinZNode(String prefix, String suffix) {
344     return prefix + ZNODE_PATH_SEPARATOR + suffix;
345   }
346 
347   /**
348    * Returns the full path of the immediate parent of the specified node.
349    * @param node path to get parent of
350    * @return parent of path, null if passed the root node or an invalid node
351    */
352   public static String getParent(String node) {
353     int idx = node.lastIndexOf(ZNODE_PATH_SEPARATOR);
354     return idx <= 0 ? null : node.substring(0, idx);
355   }
356 
357   /**
358    * Get the name of the current node from the specified fully-qualified path.
359    * @param path fully-qualified path
360    * @return name of the current node
361    */
362   public static String getNodeName(String path) {
363     return path.substring(path.lastIndexOf("/")+1);
364   }
365 
366   /**
367    * Get the key to the ZK ensemble for this configuration without
368    * adding a name at the end
369    * @param conf Configuration to use to build the key
370    * @return ensemble key without a name
371    */
372   public static String getZooKeeperClusterKey(Configuration conf) {
373     return getZooKeeperClusterKey(conf, null);
374   }
375 
376   /**
377    * Get the key to the ZK ensemble for this configuration and append
378    * a name at the end
379    * @param conf Configuration to use to build the key
380    * @param name Name that should be appended at the end if not empty or null
381    * @return ensemble key with a name (if any)
382    */
383   public static String getZooKeeperClusterKey(Configuration conf, String name) {
384     String ensemble = conf.get(HConstants.ZOOKEEPER_QUORUM.replaceAll(
385         "[\\t\\n\\x0B\\f\\r]", ""));
386     StringBuilder builder = new StringBuilder(ensemble);
387     builder.append(":");
388     builder.append(conf.get(HConstants.ZOOKEEPER_CLIENT_PORT));
389     builder.append(":");
390     builder.append(conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT));
391     if (name != null && !name.isEmpty()) {
392       builder.append(",");
393       builder.append(name);
394     }
395     return builder.toString();
396   }
397 
398   /**
399    * Apply the settings in the given key to the given configuration, this is
400    * used to communicate with distant clusters
401    * @param conf configuration object to configure
402    * @param key string that contains the 3 required configuratins
403    * @throws IOException
404    */
405   public static void applyClusterKeyToConf(Configuration conf, String key)
406       throws IOException{
407     ZKClusterKey zkClusterKey = transformClusterKey(key);
408     conf.set(HConstants.ZOOKEEPER_QUORUM, zkClusterKey.quorumString);
409     conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClusterKey.clientPort);
410     conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, zkClusterKey.znodeParent);
411   }
412 
413   /**
414    * Separate the given key into the three configurations it should contain:
415    * hbase.zookeeper.quorum, hbase.zookeeper.client.port
416    * and zookeeper.znode.parent
417    * @param key
418    * @return the three configuration in the described order
419    * @throws IOException
420    */
421   public static ZKClusterKey transformClusterKey(String key) throws IOException {
422     String[] parts = key.split(":");
423 
424     if (parts.length == 3) {
425       return new ZKClusterKey(parts [0], Integer.parseInt(parts [1]), parts [2]);
426     }
427 
428     if (parts.length > 3) {
429       // The quorum could contain client port in server:clientport format, try to transform more.
430       String zNodeParent = parts [parts.length - 1];
431       String clientPort = parts [parts.length - 2];
432 
433       // The first part length is the total length minus the lengths of other parts and minus 2 ":"
434       int endQuorumIndex = key.length() - zNodeParent.length() - clientPort.length() - 2;
435       String quorumStringInput = key.substring(0, endQuorumIndex);
436       String[] serverHosts = quorumStringInput.split(",");
437 
438       // The common case is that every server has its own client port specified - this means
439       // that (total parts - the ZNodeParent part - the ClientPort part) is equal to
440       // (the number of "," + 1) - "+ 1" because the last server has no ",".
441       if ((parts.length - 2) == (serverHosts.length + 1)) {
442         return new ZKClusterKey(quorumStringInput, Integer.parseInt(clientPort), zNodeParent);
443       }
444 
445       // For the uncommon case that some servers has no port specified, we need to build the
446       // server:clientport list using default client port for servers without specified port.
447       return new ZKClusterKey(
448         ZKConfig.buildQuorumServerString(serverHosts, clientPort),
449         Integer.parseInt(clientPort),
450         zNodeParent);
451     }
452 
453     throw new IOException("Cluster key passed " + key + " is invalid, the format should be:" +
454           HConstants.ZOOKEEPER_QUORUM + ":" + HConstants.ZOOKEEPER_CLIENT_PORT + ":"
455           + HConstants.ZOOKEEPER_ZNODE_PARENT);
456   }
457 
458   /**
459    * Standardize the ZK quorum string: make it a "server:clientport" list, separated by ','
460    * @param quorumStringInput a string contains a list of servers for ZK quorum
461    * @param clientPort the default client port
462    * @return the string for a list of "server:port" separated by ","
463    */
464   @VisibleForTesting
465   public static String standardizeQuorumServerString(String quorumStringInput, String clientPort) {
466     String[] serverHosts = quorumStringInput.split(",");
467     return ZKConfig.buildQuorumServerString(serverHosts, clientPort);
468   }
469 
470   //
471   // Existence checks and watches
472   //
473 
474   /**
475    * Watch the specified znode for delete/create/change events.  The watcher is
476    * set whether or not the node exists.  If the node already exists, the method
477    * returns true.  If the node does not exist, the method returns false.
478    *
479    * @param zkw zk reference
480    * @param znode path of node to watch
481    * @return true if znode exists, false if does not exist or error
482    * @throws KeeperException if unexpected zookeeper exception
483    */
484   public static boolean watchAndCheckExists(ZooKeeperWatcher zkw, String znode)
485   throws KeeperException {
486     try {
487       Stat s = zkw.getRecoverableZooKeeper().exists(znode, zkw);
488       boolean exists = s != null ? true : false;
489       if (exists) {
490         LOG.debug(zkw.prefix("Set watcher on existing znode=" + znode));
491       } else {
492         LOG.debug(zkw.prefix("Set watcher on znode that does not yet exist, " + znode));
493       }
494       return exists;
495     } catch (KeeperException e) {
496       LOG.warn(zkw.prefix("Unable to set watcher on znode " + znode), e);
497       zkw.keeperException(e);
498       return false;
499     } catch (InterruptedException e) {
500       LOG.warn(zkw.prefix("Unable to set watcher on znode " + znode), e);
501       zkw.interruptedException(e);
502       return false;
503     }
504   }
505 
506   /**
507    * Watch the specified znode, but only if exists. Useful when watching
508    * for deletions. Uses .getData() (and handles NoNodeException) instead
509    * of .exists() to accomplish this, as .getData() will only set a watch if
510    * the znode exists.
511    * @param zkw zk reference
512    * @param znode path of node to watch
513    * @return true if the watch is set, false if node does not exists
514    * @throws KeeperException if unexpected zookeeper exception
515    */
516   public static boolean setWatchIfNodeExists(ZooKeeperWatcher zkw, String znode)
517       throws KeeperException {
518     try {
519       zkw.getRecoverableZooKeeper().getData(znode, true, null);
520       return true;
521     } catch (NoNodeException e) {
522       return false;
523     } catch (InterruptedException e) {
524       LOG.warn(zkw.prefix("Unable to set watcher on znode " + znode), e);
525       zkw.interruptedException(e);
526       return false;
527     }
528   }
529 
530   /**
531    * Check if the specified node exists.  Sets no watches.
532    *
533    * @param zkw zk reference
534    * @param znode path of node to watch
535    * @return version of the node if it exists, -1 if does not exist
536    * @throws KeeperException if unexpected zookeeper exception
537    */
538   public static int checkExists(ZooKeeperWatcher zkw, String znode)
539   throws KeeperException {
540     try {
541       Stat s = zkw.getRecoverableZooKeeper().exists(znode, null);
542       return s != null ? s.getVersion() : -1;
543     } catch (KeeperException e) {
544       LOG.warn(zkw.prefix("Unable to set watcher on znode (" + znode + ")"), e);
545       zkw.keeperException(e);
546       return -1;
547     } catch (InterruptedException e) {
548       LOG.warn(zkw.prefix("Unable to set watcher on znode (" + znode + ")"), e);
549       zkw.interruptedException(e);
550       return -1;
551     }
552   }
553 
554   //
555   // Znode listings
556   //
557 
558   /**
559    * Lists the children znodes of the specified znode.  Also sets a watch on
560    * the specified znode which will capture a NodeDeleted event on the specified
561    * znode as well as NodeChildrenChanged if any children of the specified znode
562    * are created or deleted.
563    *
564    * Returns null if the specified node does not exist.  Otherwise returns a
565    * list of children of the specified node.  If the node exists but it has no
566    * children, an empty list will be returned.
567    *
568    * @param zkw zk reference
569    * @param znode path of node to list and watch children of
570    * @return list of children of the specified node, an empty list if the node
571    *          exists but has no children, and null if the node does not exist
572    * @throws KeeperException if unexpected zookeeper exception
573    */
574   public static List<String> listChildrenAndWatchForNewChildren(
575       ZooKeeperWatcher zkw, String znode)
576   throws KeeperException {
577     try {
578       List<String> children = zkw.getRecoverableZooKeeper().getChildren(znode, zkw);
579       return children;
580     } catch(KeeperException.NoNodeException ke) {
581       LOG.debug(zkw.prefix("Unable to list children of znode " + znode + " " +
582           "because node does not exist (not an error)"));
583       return null;
584     } catch (KeeperException e) {
585       LOG.warn(zkw.prefix("Unable to list children of znode " + znode + " "), e);
586       zkw.keeperException(e);
587       return null;
588     } catch (InterruptedException e) {
589       LOG.warn(zkw.prefix("Unable to list children of znode " + znode + " "), e);
590       zkw.interruptedException(e);
591       return null;
592     }
593   }
594 
595   /**
596    * List all the children of the specified znode, setting a watch for children
597    * changes and also setting a watch on every individual child in order to get
598    * the NodeCreated and NodeDeleted events.
599    * @param zkw zookeeper reference
600    * @param znode node to get children of and watch
601    * @return list of znode names, null if the node doesn't exist
602    * @throws KeeperException
603    */
604   public static List<String> listChildrenAndWatchThem(ZooKeeperWatcher zkw,
605       String znode) throws KeeperException {
606     List<String> children = listChildrenAndWatchForNewChildren(zkw, znode);
607     if (children == null) {
608       return null;
609     }
610     for (String child : children) {
611       watchAndCheckExists(zkw, joinZNode(znode, child));
612     }
613     return children;
614   }
615 
616   /**
617    * Lists the children of the specified znode without setting any watches.
618    *
619    * Sets no watches at all, this method is best effort.
620    *
621    * Returns an empty list if the node has no children.  Returns null if the
622    * parent node itself does not exist.
623    *
624    * @param zkw zookeeper reference
625    * @param znode node to get children
626    * @return list of data of children of specified znode, empty if no children,
627    *         null if parent does not exist
628    * @throws KeeperException if unexpected zookeeper exception
629    */
630   public static List<String> listChildrenNoWatch(ZooKeeperWatcher zkw, String znode)
631   throws KeeperException {
632     List<String> children = null;
633     try {
634       // List the children without watching
635       children = zkw.getRecoverableZooKeeper().getChildren(znode, null);
636     } catch(KeeperException.NoNodeException nne) {
637       return null;
638     } catch(InterruptedException ie) {
639       zkw.interruptedException(ie);
640     }
641     return children;
642   }
643 
644   /**
645    * Simple class to hold a node path and node data.
646    * @deprecated Unused
647    */
648   @Deprecated
649   public static class NodeAndData {
650     private String node;
651     private byte [] data;
652     public NodeAndData(String node, byte [] data) {
653       this.node = node;
654       this.data = data;
655     }
656     public String getNode() {
657       return node;
658     }
659     public byte [] getData() {
660       return data;
661     }
662     @Override
663     public String toString() {
664       return node;
665     }
666     public boolean isEmpty() {
667       return (data == null || data.length == 0);
668     }
669   }
670 
671   /**
672    * Checks if the specified znode has any children.  Sets no watches.
673    *
674    * Returns true if the node exists and has children.  Returns false if the
675    * node does not exist or if the node does not have any children.
676    *
677    * Used during master initialization to determine if the master is a
678    * failed-over-to master or the first master during initial cluster startup.
679    * If the directory for regionserver ephemeral nodes is empty then this is
680    * a cluster startup, if not then it is not cluster startup.
681    *
682    * @param zkw zk reference
683    * @param znode path of node to check for children of
684    * @return true if node has children, false if not or node does not exist
685    * @throws KeeperException if unexpected zookeeper exception
686    */
687   public static boolean nodeHasChildren(ZooKeeperWatcher zkw, String znode)
688   throws KeeperException {
689     try {
690       return !zkw.getRecoverableZooKeeper().getChildren(znode, null).isEmpty();
691     } catch(KeeperException.NoNodeException ke) {
692       LOG.debug(zkw.prefix("Unable to list children of znode " + znode + " " +
693       "because node does not exist (not an error)"));
694       return false;
695     } catch (KeeperException e) {
696       LOG.warn(zkw.prefix("Unable to list children of znode " + znode), e);
697       zkw.keeperException(e);
698       return false;
699     } catch (InterruptedException e) {
700       LOG.warn(zkw.prefix("Unable to list children of znode " + znode), e);
701       zkw.interruptedException(e);
702       return false;
703     }
704   }
705 
706   /**
707    * Get the number of children of the specified node.
708    *
709    * If the node does not exist or has no children, returns 0.
710    *
711    * Sets no watches at all.
712    *
713    * @param zkw zk reference
714    * @param znode path of node to count children of
715    * @return number of children of specified node, 0 if none or parent does not
716    *         exist
717    * @throws KeeperException if unexpected zookeeper exception
718    */
719   public static int getNumberOfChildren(ZooKeeperWatcher zkw, String znode)
720   throws KeeperException {
721     try {
722       Stat stat = zkw.getRecoverableZooKeeper().exists(znode, null);
723       return stat == null ? 0 : stat.getNumChildren();
724     } catch(KeeperException e) {
725       LOG.warn(zkw.prefix("Unable to get children of node " + znode));
726       zkw.keeperException(e);
727     } catch(InterruptedException e) {
728       zkw.interruptedException(e);
729     }
730     return 0;
731   }
732 
733   //
734   // Data retrieval
735   //
736 
737   /**
738    * Get znode data. Does not set a watcher.
739    * @return ZNode data, null if the node does not exist or if there is an
740    *  error.
741    */
742   public static byte [] getData(ZooKeeperWatcher zkw, String znode)
743       throws KeeperException, InterruptedException {
744     try {
745       byte [] data = zkw.getRecoverableZooKeeper().getData(znode, null, null);
746       logRetrievedMsg(zkw, znode, data, false);
747       return data;
748     } catch (KeeperException.NoNodeException e) {
749       LOG.debug(zkw.prefix("Unable to get data of znode " + znode + " " +
750           "because node does not exist (not an error)"));
751       return null;
752     } catch (KeeperException e) {
753       LOG.warn(zkw.prefix("Unable to get data of znode " + znode), e);
754       zkw.keeperException(e);
755       return null;
756     }
757   }
758 
759   /**
760    * Get the data at the specified znode and set a watch.
761    *
762    * Returns the data and sets a watch if the node exists.  Returns null and no
763    * watch is set if the node does not exist or there is an exception.
764    *
765    * @param zkw zk reference
766    * @param znode path of node
767    * @return data of the specified znode, or null
768    * @throws KeeperException if unexpected zookeeper exception
769    */
770   public static byte [] getDataAndWatch(ZooKeeperWatcher zkw, String znode)
771   throws KeeperException {
772     return getDataInternal(zkw, znode, null, true);
773   }
774 
775   /**
776    * Get the data at the specified znode and set a watch.
777    *
778    * Returns the data and sets a watch if the node exists.  Returns null and no
779    * watch is set if the node does not exist or there is an exception.
780    *
781    * @param zkw zk reference
782    * @param znode path of node
783    * @param stat object to populate the version of the znode
784    * @return data of the specified znode, or null
785    * @throws KeeperException if unexpected zookeeper exception
786    */
787   public static byte[] getDataAndWatch(ZooKeeperWatcher zkw, String znode,
788       Stat stat) throws KeeperException {
789     return getDataInternal(zkw, znode, stat, true);
790   }
791 
792   private static byte[] getDataInternal(ZooKeeperWatcher zkw, String znode, Stat stat,
793       boolean watcherSet)
794       throws KeeperException {
795     try {
796       byte [] data = zkw.getRecoverableZooKeeper().getData(znode, zkw, stat);
797       logRetrievedMsg(zkw, znode, data, watcherSet);
798       return data;
799     } catch (KeeperException.NoNodeException e) {
800       // This log can get pretty annoying when we cycle on 100ms waits.
801       // Enable trace if you really want to see it.
802       LOG.trace(zkw.prefix("Unable to get data of znode " + znode + " " +
803         "because node does not exist (not an error)"));
804       return null;
805     } catch (KeeperException e) {
806       LOG.warn(zkw.prefix("Unable to get data of znode " + znode), e);
807       zkw.keeperException(e);
808       return null;
809     } catch (InterruptedException e) {
810       LOG.warn(zkw.prefix("Unable to get data of znode " + znode), e);
811       zkw.interruptedException(e);
812       return null;
813     }
814   }
815 
816   /**
817    * Get the data at the specified znode without setting a watch.
818    *
819    * Returns the data if the node exists.  Returns null if the node does not
820    * exist.
821    *
822    * Sets the stats of the node in the passed Stat object.  Pass a null stat if
823    * not interested.
824    *
825    * @param zkw zk reference
826    * @param znode path of node
827    * @param stat node status to get if node exists
828    * @return data of the specified znode, or null if node does not exist
829    * @throws KeeperException if unexpected zookeeper exception
830    */
831   public static byte [] getDataNoWatch(ZooKeeperWatcher zkw, String znode,
832       Stat stat)
833   throws KeeperException {
834     try {
835       byte [] data = zkw.getRecoverableZooKeeper().getData(znode, null, stat);
836       logRetrievedMsg(zkw, znode, data, false);
837       return data;
838     } catch (KeeperException.NoNodeException e) {
839       LOG.debug(zkw.prefix("Unable to get data of znode " + znode + " " +
840           "because node does not exist (not necessarily an error)"));
841       return null;
842     } catch (KeeperException e) {
843       LOG.warn(zkw.prefix("Unable to get data of znode " + znode), e);
844       zkw.keeperException(e);
845       return null;
846     } catch (InterruptedException e) {
847       LOG.warn(zkw.prefix("Unable to get data of znode " + znode), e);
848       zkw.interruptedException(e);
849       return null;
850     }
851   }
852 
853   /**
854    * Returns the date of child znodes of the specified znode.  Also sets a watch on
855    * the specified znode which will capture a NodeDeleted event on the specified
856    * znode as well as NodeChildrenChanged if any children of the specified znode
857    * are created or deleted.
858    *
859    * Returns null if the specified node does not exist.  Otherwise returns a
860    * list of children of the specified node.  If the node exists but it has no
861    * children, an empty list will be returned.
862    *
863    * @param zkw zk reference
864    * @param baseNode path of node to list and watch children of
865    * @return list of data of children of the specified node, an empty list if the node
866    *          exists but has no children, and null if the node does not exist
867    * @throws KeeperException if unexpected zookeeper exception
868    * @deprecated Unused
869    */
870   public static List<NodeAndData> getChildDataAndWatchForNewChildren(
871       ZooKeeperWatcher zkw, String baseNode) throws KeeperException {
872     List<String> nodes =
873       ZKUtil.listChildrenAndWatchForNewChildren(zkw, baseNode);
874     if (nodes != null) {
875       List<NodeAndData> newNodes = new ArrayList<NodeAndData>();
876       for (String node : nodes) {
877         String nodePath = ZKUtil.joinZNode(baseNode, node);
878         byte[] data = ZKUtil.getDataAndWatch(zkw, nodePath);
879         newNodes.add(new NodeAndData(nodePath, data));
880       }
881       return newNodes;
882     }
883     return null;
884   }
885 
886   /**
887    * Update the data of an existing node with the expected version to have the
888    * specified data.
889    *
890    * Throws an exception if there is a version mismatch or some other problem.
891    *
892    * Sets no watches under any conditions.
893    *
894    * @param zkw zk reference
895    * @param znode
896    * @param data
897    * @param expectedVersion
898    * @throws KeeperException if unexpected zookeeper exception
899    * @throws KeeperException.BadVersionException if version mismatch
900    * @deprecated Unused
901    */
902   public static void updateExistingNodeData(ZooKeeperWatcher zkw, String znode,
903       byte [] data, int expectedVersion)
904   throws KeeperException {
905     try {
906       zkw.getRecoverableZooKeeper().setData(znode, data, expectedVersion);
907     } catch(InterruptedException ie) {
908       zkw.interruptedException(ie);
909     }
910   }
911 
912   //
913   // Data setting
914   //
915 
916   /**
917    * Sets the data of the existing znode to be the specified data.  Ensures that
918    * the current data has the specified expected version.
919    *
920    * <p>If the node does not exist, a {@link NoNodeException} will be thrown.
921    *
922    * <p>If their is a version mismatch, method returns null.
923    *
924    * <p>No watches are set but setting data will trigger other watchers of this
925    * node.
926    *
927    * <p>If there is another problem, a KeeperException will be thrown.
928    *
929    * @param zkw zk reference
930    * @param znode path of node
931    * @param data data to set for node
932    * @param expectedVersion version expected when setting data
933    * @return true if data set, false if version mismatch
934    * @throws KeeperException if unexpected zookeeper exception
935    */
936   public static boolean setData(ZooKeeperWatcher zkw, String znode,
937       byte [] data, int expectedVersion)
938   throws KeeperException, KeeperException.NoNodeException {
939     try {
940       return zkw.getRecoverableZooKeeper().setData(znode, data, expectedVersion) != null;
941     } catch (InterruptedException e) {
942       zkw.interruptedException(e);
943       return false;
944     }
945   }
946 
947   /**
948    * Set data into node creating node if it doesn't yet exist.
949    * Does not set watch.
950    *
951    * @param zkw zk reference
952    * @param znode path of node
953    * @param data data to set for node
954    * @throws KeeperException
955    */
956   public static void createSetData(final ZooKeeperWatcher zkw, final String znode,
957       final byte [] data)
958   throws KeeperException {
959     if (checkExists(zkw, znode) == -1) {
960       ZKUtil.createWithParents(zkw, znode, data);
961     } else {
962       ZKUtil.setData(zkw, znode, data);
963     }
964   }
965 
966   /**
967    * Sets the data of the existing znode to be the specified data.  The node
968    * must exist but no checks are done on the existing data or version.
969    *
970    * <p>If the node does not exist, a {@link NoNodeException} will be thrown.
971    *
972    * <p>No watches are set but setting data will trigger other watchers of this
973    * node.
974    *
975    * <p>If there is another problem, a KeeperException will be thrown.
976    *
977    * @param zkw zk reference
978    * @param znode path of node
979    * @param data data to set for node
980    * @throws KeeperException if unexpected zookeeper exception
981    */
982   public static void setData(ZooKeeperWatcher zkw, String znode, byte [] data)
983   throws KeeperException, KeeperException.NoNodeException {
984     setData(zkw, (SetData)ZKUtilOp.setData(znode, data));
985   }
986 
987   private static void setData(ZooKeeperWatcher zkw, SetData setData)
988   throws KeeperException, KeeperException.NoNodeException {
989     SetDataRequest sd = (SetDataRequest)toZooKeeperOp(zkw, setData).toRequestRecord();
990     setData(zkw, sd.getPath(), sd.getData(), sd.getVersion());
991   }
992 
993   /**
994    * Returns whether or not secure authentication is enabled
995    * (whether <code>hbase.security.authentication</code> is set to
996    * <code>kerberos</code>.
997    */
998   public static boolean isSecureZooKeeper(Configuration conf) {
999     // Detection for embedded HBase client with jaas configuration
1000     // defined for third party programs.
1001     try {
1002       javax.security.auth.login.Configuration testConfig =
1003           javax.security.auth.login.Configuration.getConfiguration();
1004       if (testConfig.getAppConfigurationEntry("Client") == null
1005           && testConfig.getAppConfigurationEntry(
1006             JaasConfiguration.CLIENT_KEYTAB_KERBEROS_CONFIG_NAME) == null
1007           && testConfig.getAppConfigurationEntry(
1008               JaasConfiguration.SERVER_KEYTAB_KERBEROS_CONFIG_NAME) == null
1009           && conf.get(HConstants.ZK_CLIENT_KERBEROS_PRINCIPAL) == null
1010           && conf.get(HConstants.ZK_SERVER_KERBEROS_PRINCIPAL) == null) {
1011               
1012         return false;
1013       }
1014     } catch(Exception e) {
1015       // No Jaas configuration defined.
1016       return false;
1017     }
1018 
1019     // Master & RSs uses hbase.zookeeper.client.*
1020     return "kerberos".equalsIgnoreCase(conf.get("hbase.security.authentication"));
1021   }
1022 
1023   private static ArrayList<ACL> createACL(ZooKeeperWatcher zkw, String node) {
1024     return createACL(zkw, node, isSecureZooKeeper(zkw.getConfiguration()));
1025   }
1026 
1027   public static ArrayList<ACL> createACL(ZooKeeperWatcher zkw, String node,
1028     boolean isSecureZooKeeper) {
1029     if (!node.startsWith(zkw.baseZNode)) {
1030       return Ids.OPEN_ACL_UNSAFE;
1031     }
1032     if (isSecureZooKeeper) {
1033       String superUser = zkw.getConfiguration().get("hbase.superuser");
1034       ArrayList<ACL> acls = new ArrayList<ACL>();
1035       // add permission to hbase supper user
1036       if (superUser != null) {
1037         acls.add(new ACL(Perms.ALL, new Id("auth", superUser)));
1038       }
1039       // Certain znodes are accessed directly by the client,
1040       // so they must be readable by non-authenticated clients
1041       if (zkw.isClientReadable(node)) {
1042         acls.addAll(Ids.CREATOR_ALL_ACL);
1043         acls.addAll(Ids.READ_ACL_UNSAFE);
1044       } else {
1045         acls.addAll(Ids.CREATOR_ALL_ACL);
1046       }
1047       return acls;
1048     } else {
1049       return Ids.OPEN_ACL_UNSAFE;
1050     }
1051   }
1052 
1053   //
1054   // Node creation
1055   //
1056 
1057   /**
1058    *
1059    * Set the specified znode to be an ephemeral node carrying the specified
1060    * data.
1061    *
1062    * If the node is created successfully, a watcher is also set on the node.
1063    *
1064    * If the node is not created successfully because it already exists, this
1065    * method will also set a watcher on the node.
1066    *
1067    * If there is another problem, a KeeperException will be thrown.
1068    *
1069    * @param zkw zk reference
1070    * @param znode path of node
1071    * @param data data of node
1072    * @return true if node created, false if not, watch set in both cases
1073    * @throws KeeperException if unexpected zookeeper exception
1074    */
1075   public static boolean createEphemeralNodeAndWatch(ZooKeeperWatcher zkw,
1076       String znode, byte [] data)
1077   throws KeeperException {
1078     boolean ret = true;
1079     try {
1080       zkw.getRecoverableZooKeeper().create(znode, data, createACL(zkw, znode),
1081           CreateMode.EPHEMERAL);
1082     } catch (KeeperException.NodeExistsException nee) {
1083       ret = false;
1084     } catch (InterruptedException e) {
1085       LOG.info("Interrupted", e);
1086       Thread.currentThread().interrupt();
1087     }
1088     if(!watchAndCheckExists(zkw, znode)) {
1089       // It did exist but now it doesn't, try again
1090       return createEphemeralNodeAndWatch(zkw, znode, data);
1091     }
1092     return ret;
1093   }
1094 
1095   /**
1096    * Creates the specified znode to be a persistent node carrying the specified
1097    * data.
1098    *
1099    * Returns true if the node was successfully created, false if the node
1100    * already existed.
1101    *
1102    * If the node is created successfully, a watcher is also set on the node.
1103    *
1104    * If the node is not created successfully because it already exists, this
1105    * method will also set a watcher on the node but return false.
1106    *
1107    * If there is another problem, a KeeperException will be thrown.
1108    *
1109    * @param zkw zk reference
1110    * @param znode path of node
1111    * @param data data of node
1112    * @return true if node created, false if not, watch set in both cases
1113    * @throws KeeperException if unexpected zookeeper exception
1114    */
1115   public static boolean createNodeIfNotExistsAndWatch(
1116       ZooKeeperWatcher zkw, String znode, byte [] data)
1117   throws KeeperException {
1118     boolean ret = true;
1119     try {
1120       zkw.getRecoverableZooKeeper().create(znode, data, createACL(zkw, znode),
1121           CreateMode.PERSISTENT);
1122     } catch (KeeperException.NodeExistsException nee) {
1123       ret = false;
1124     } catch (InterruptedException e) {
1125       zkw.interruptedException(e);
1126       return false;
1127     }
1128     try {
1129       zkw.getRecoverableZooKeeper().exists(znode, zkw);
1130     } catch (InterruptedException e) {
1131       zkw.interruptedException(e);
1132       return false;
1133     }
1134     return ret;
1135   }
1136 
1137   /**
1138    * Creates the specified znode with the specified data but does not watch it.
1139    *
1140    * Returns the znode of the newly created node
1141    *
1142    * If there is another problem, a KeeperException will be thrown.
1143    *
1144    * @param zkw zk reference
1145    * @param znode path of node
1146    * @param data data of node
1147    * @param createMode specifying whether the node to be created is ephemeral and/or sequential
1148    * @return true name of the newly created znode or null
1149    * @throws KeeperException if unexpected zookeeper exception
1150    */
1151   public static String createNodeIfNotExistsNoWatch(ZooKeeperWatcher zkw, String znode,
1152       byte[] data, CreateMode createMode) throws KeeperException {
1153 
1154     String createdZNode = null;
1155     try {
1156       createdZNode = zkw.getRecoverableZooKeeper().create(znode, data,
1157           createACL(zkw, znode), createMode);
1158     } catch (KeeperException.NodeExistsException nee) {
1159       return znode;
1160     } catch (InterruptedException e) {
1161       zkw.interruptedException(e);
1162       return null;
1163     }
1164     return createdZNode;
1165   }
1166 
1167   /**
1168    * Creates the specified node with the specified data and watches it.
1169    *
1170    * <p>Throws an exception if the node already exists.
1171    *
1172    * <p>The node created is persistent and open access.
1173    *
1174    * <p>Returns the version number of the created node if successful.
1175    *
1176    * @param zkw zk reference
1177    * @param znode path of node to create
1178    * @param data data of node to create
1179    * @return version of node created
1180    * @throws KeeperException if unexpected zookeeper exception
1181    * @throws KeeperException.NodeExistsException if node already exists
1182    */
1183   public static int createAndWatch(ZooKeeperWatcher zkw,
1184       String znode, byte [] data)
1185   throws KeeperException, KeeperException.NodeExistsException {
1186     try {
1187       zkw.getRecoverableZooKeeper().create(znode, data, createACL(zkw, znode),
1188           CreateMode.PERSISTENT);
1189       Stat stat = zkw.getRecoverableZooKeeper().exists(znode, zkw);
1190       if (stat == null){
1191         // Likely a race condition. Someone deleted the znode.
1192         throw KeeperException.create(KeeperException.Code.SYSTEMERROR,
1193             "ZK.exists returned null (i.e.: znode does not exist) for znode=" + znode);
1194       }
1195      return stat.getVersion();
1196     } catch (InterruptedException e) {
1197       zkw.interruptedException(e);
1198       return -1;
1199     }
1200   }
1201 
1202   /**
1203    * Async creates the specified node with the specified data.
1204    *
1205    * <p>Throws an exception if the node already exists.
1206    *
1207    * <p>The node created is persistent and open access.
1208    *
1209    * @param zkw zk reference
1210    * @param znode path of node to create
1211    * @param data data of node to create
1212    * @param cb
1213    * @param ctx
1214    * @throws KeeperException if unexpected zookeeper exception
1215    * @throws KeeperException.NodeExistsException if node already exists
1216    */
1217   public static void asyncCreate(ZooKeeperWatcher zkw,
1218       String znode, byte [] data, final AsyncCallback.StringCallback cb,
1219       final Object ctx) {
1220     zkw.getRecoverableZooKeeper().getZooKeeper().create(znode, data,
1221         createACL(zkw, znode), CreateMode.PERSISTENT, cb, ctx);
1222   }
1223 
1224   /**
1225    * Creates the specified node, iff the node does not exist.  Does not set a
1226    * watch and fails silently if the node already exists.
1227    *
1228    * The node created is persistent and open access.
1229    *
1230    * @param zkw zk reference
1231    * @param znode path of node
1232    * @throws KeeperException if unexpected zookeeper exception
1233    */
1234   public static void createAndFailSilent(ZooKeeperWatcher zkw,
1235       String znode) throws KeeperException {
1236     createAndFailSilent(zkw, znode, new byte[0]);
1237   }
1238 
1239   /**
1240    * Creates the specified node containing specified data, iff the node does not exist.  Does
1241    * not set a watch and fails silently if the node already exists.
1242    *
1243    * The node created is persistent and open access.
1244    *
1245    * @param zkw zk reference
1246    * @param znode path of node
1247    * @param data a byte array data to store in the znode
1248    * @throws KeeperException if unexpected zookeeper exception
1249    */
1250   public static void createAndFailSilent(ZooKeeperWatcher zkw,
1251       String znode, byte[] data)
1252   throws KeeperException {
1253     createAndFailSilent(zkw,
1254         (CreateAndFailSilent)ZKUtilOp.createAndFailSilent(znode, data));
1255   }
1256 
1257   private static void createAndFailSilent(ZooKeeperWatcher zkw, CreateAndFailSilent cafs)
1258   throws KeeperException {
1259     CreateRequest create = (CreateRequest)toZooKeeperOp(zkw, cafs).toRequestRecord();
1260     String znode = create.getPath();
1261     try {
1262       RecoverableZooKeeper zk = zkw.getRecoverableZooKeeper();
1263       if (zk.exists(znode, false) == null) {
1264         zk.create(znode, create.getData(), create.getAcl(), CreateMode.fromFlag(create.getFlags()));
1265       }
1266     } catch(KeeperException.NodeExistsException nee) {
1267     } catch(KeeperException.NoAuthException nee){
1268       try {
1269         if (null == zkw.getRecoverableZooKeeper().exists(znode, false)) {
1270           // If we failed to create the file and it does not already exist.
1271           throw(nee);
1272         }
1273       } catch (InterruptedException ie) {
1274         zkw.interruptedException(ie);
1275       }
1276     } catch(InterruptedException ie) {
1277       zkw.interruptedException(ie);
1278     }
1279   }
1280 
1281   /**
1282    * Creates the specified node and all parent nodes required for it to exist.
1283    *
1284    * No watches are set and no errors are thrown if the node already exists.
1285    *
1286    * The nodes created are persistent and open access.
1287    *
1288    * @param zkw zk reference
1289    * @param znode path of node
1290    * @throws KeeperException if unexpected zookeeper exception
1291    */
1292   public static void createWithParents(ZooKeeperWatcher zkw, String znode)
1293   throws KeeperException {
1294     createWithParents(zkw, znode, new byte[0]);
1295   }
1296 
1297   /**
1298    * Creates the specified node and all parent nodes required for it to exist.  The creation of
1299    * parent znodes is not atomic with the leafe znode creation but the data is written atomically
1300    * when the leaf node is created.
1301    *
1302    * No watches are set and no errors are thrown if the node already exists.
1303    *
1304    * The nodes created are persistent and open access.
1305    *
1306    * @param zkw zk reference
1307    * @param znode path of node
1308    * @throws KeeperException if unexpected zookeeper exception
1309    */
1310   public static void createWithParents(ZooKeeperWatcher zkw, String znode, byte[] data)
1311   throws KeeperException {
1312     try {
1313       if(znode == null) {
1314         return;
1315       }
1316       zkw.getRecoverableZooKeeper().create(znode, data, createACL(zkw, znode),
1317           CreateMode.PERSISTENT);
1318     } catch(KeeperException.NodeExistsException nee) {
1319       return;
1320     } catch(KeeperException.NoNodeException nne) {
1321       createWithParents(zkw, getParent(znode));
1322       createWithParents(zkw, znode, data);
1323     } catch(InterruptedException ie) {
1324       zkw.interruptedException(ie);
1325     }
1326   }
1327 
1328   //
1329   // Deletes
1330   //
1331 
1332   /**
1333    * Delete the specified node.  Sets no watches.  Throws all exceptions.
1334    */
1335   public static void deleteNode(ZooKeeperWatcher zkw, String node)
1336   throws KeeperException {
1337     deleteNode(zkw, node, -1);
1338   }
1339 
1340   /**
1341    * Delete the specified node with the specified version.  Sets no watches.
1342    * Throws all exceptions.
1343    */
1344   public static boolean deleteNode(ZooKeeperWatcher zkw, String node,
1345       int version)
1346   throws KeeperException {
1347     try {
1348       zkw.getRecoverableZooKeeper().delete(node, version);
1349       return true;
1350     } catch(KeeperException.BadVersionException bve) {
1351       return false;
1352     } catch(InterruptedException ie) {
1353       zkw.interruptedException(ie);
1354       return false;
1355     }
1356   }
1357 
1358   /**
1359    * Deletes the specified node.  Fails silent if the node does not exist.
1360    * @param zkw
1361    * @param node
1362    * @throws KeeperException
1363    */
1364   public static void deleteNodeFailSilent(ZooKeeperWatcher zkw, String node)
1365   throws KeeperException {
1366     deleteNodeFailSilent(zkw,
1367       (DeleteNodeFailSilent)ZKUtilOp.deleteNodeFailSilent(node));
1368   }
1369 
1370   private static void deleteNodeFailSilent(ZooKeeperWatcher zkw,
1371       DeleteNodeFailSilent dnfs) throws KeeperException {
1372     DeleteRequest delete = (DeleteRequest)toZooKeeperOp(zkw, dnfs).toRequestRecord();
1373     try {
1374       zkw.getRecoverableZooKeeper().delete(delete.getPath(), delete.getVersion());
1375     } catch(KeeperException.NoNodeException nne) {
1376     } catch(InterruptedException ie) {
1377       zkw.interruptedException(ie);
1378     }
1379   }
1380 
1381 
1382   /**
1383    * Delete the specified node and all of it's children.
1384    * <p>
1385    * If the node does not exist, just returns.
1386    * <p>
1387    * Sets no watches. Throws all exceptions besides dealing with deletion of
1388    * children.
1389    */
1390   public static void deleteNodeRecursively(ZooKeeperWatcher zkw, String node)
1391   throws KeeperException {
1392     deleteNodeRecursivelyMultiOrSequential(zkw, true, node);
1393   }
1394 
1395   /**
1396    * Delete all the children of the specified node but not the node itself.
1397    *
1398    * Sets no watches.  Throws all exceptions besides dealing with deletion of
1399    * children.
1400    *
1401    * If hbase.zookeeper.useMulti is true, use ZooKeeper's multi-update functionality.
1402    * Otherwise, run the list of operations sequentially.
1403    *
1404    * @throws KeeperException
1405    */
1406   public static void deleteChildrenRecursively(ZooKeeperWatcher zkw, String node)
1407       throws KeeperException {
1408     deleteChildrenRecursivelyMultiOrSequential(zkw, true, node);
1409   }
1410 
1411   /**
1412    * Delete all the children of the specified node but not the node itself. This
1413    * will first traverse the znode tree for listing the children and then delete
1414    * these znodes using multi-update api or sequential based on the specified
1415    * configurations.
1416    * <p>
1417    * Sets no watches. Throws all exceptions besides dealing with deletion of
1418    * children.
1419    * <p>
1420    * If hbase.zookeeper.useMulti is true, use ZooKeeper's multi-update
1421    * functionality. Otherwise, run the list of operations sequentially.
1422    * <p>
1423    * If all of the following are true:
1424    * <ul>
1425    * <li>runSequentialOnMultiFailure is true
1426    * <li>hbase.zookeeper.useMulti is true
1427    * </ul>
1428    * on calling multi, we get a ZooKeeper exception that can be handled by a
1429    * sequential call(*), we retry the operations one-by-one (sequentially).
1430    *
1431    * @param zkw
1432    *          - zk reference
1433    * @param runSequentialOnMultiFailure
1434    *          - if true when we get a ZooKeeper exception that could retry the
1435    *          operations one-by-one (sequentially)
1436    * @param pathRoots
1437    *          - path of the parent node(s)
1438    * @throws KeeperException.NotEmptyException
1439    *           if node has children while deleting
1440    * @throws KeeperException
1441    *           if unexpected ZooKeeper exception
1442    * @throws IllegalArgumentException
1443    *           if an invalid path is specified
1444    */
1445   public static void deleteChildrenRecursivelyMultiOrSequential(
1446       ZooKeeperWatcher zkw, boolean runSequentialOnMultiFailure,
1447       String... pathRoots) throws KeeperException {
1448     if (pathRoots == null || pathRoots.length <= 0) {
1449       LOG.warn("Given path is not valid!");
1450       return;
1451     }
1452     List<ZKUtilOp> ops = new ArrayList<ZKUtil.ZKUtilOp>();
1453     for (String eachRoot : pathRoots) {
1454       List<String> children = listChildrenBFSNoWatch(zkw, eachRoot);
1455       // Delete the leaves first and eventually get rid of the root
1456       for (int i = children.size() - 1; i >= 0; --i) {
1457         ops.add(ZKUtilOp.deleteNodeFailSilent(children.get(i)));
1458       }
1459     }
1460     // atleast one element should exist
1461     if (ops.size() > 0) {
1462       multiOrSequential(zkw, ops, runSequentialOnMultiFailure);
1463     }
1464   }
1465 
1466   /**
1467    * Delete the specified node and its children. This traverse the
1468    * znode tree for listing the children and then delete
1469    * these znodes including the parent using multi-update api or
1470    * sequential based on the specified configurations.
1471    * <p>
1472    * Sets no watches. Throws all exceptions besides dealing with deletion of
1473    * children.
1474    * <p>
1475    * If hbase.zookeeper.useMulti is true, use ZooKeeper's multi-update
1476    * functionality. Otherwise, run the list of operations sequentially.
1477    * <p>
1478    * If all of the following are true:
1479    * <ul>
1480    * <li>runSequentialOnMultiFailure is true
1481    * <li>hbase.zookeeper.useMulti is true
1482    * </ul>
1483    * on calling multi, we get a ZooKeeper exception that can be handled by a
1484    * sequential call(*), we retry the operations one-by-one (sequentially).
1485    *
1486    * @param zkw
1487    *          - zk reference
1488    * @param runSequentialOnMultiFailure
1489    *          - if true when we get a ZooKeeper exception that could retry the
1490    *          operations one-by-one (sequentially)
1491    * @param pathRoots
1492    *          - path of the parent node(s)
1493    * @throws KeeperException.NotEmptyException
1494    *           if node has children while deleting
1495    * @throws KeeperException
1496    *           if unexpected ZooKeeper exception
1497    * @throws IllegalArgumentException
1498    *           if an invalid path is specified
1499    */
1500   public static void deleteNodeRecursivelyMultiOrSequential(ZooKeeperWatcher zkw,
1501       boolean runSequentialOnMultiFailure, String... pathRoots) throws KeeperException {
1502     if (pathRoots == null || pathRoots.length <= 0) {
1503       LOG.warn("Given path is not valid!");
1504       return;
1505     }
1506     List<ZKUtilOp> ops = new ArrayList<ZKUtil.ZKUtilOp>();
1507     for (String eachRoot : pathRoots) {
1508       // Zookeeper Watches are one time triggers; When children of parent nodes are deleted
1509       // recursively, must set another watch, get notified of delete node
1510       List<String> children = listChildrenBFSAndWatchThem(zkw, eachRoot);
1511       // Delete the leaves first and eventually get rid of the root
1512       for (int i = children.size() - 1; i >= 0; --i) {
1513         ops.add(ZKUtilOp.deleteNodeFailSilent(children.get(i)));
1514       }
1515       try {
1516         if (zkw.getRecoverableZooKeeper().exists(eachRoot, zkw) != null) {
1517           ops.add(ZKUtilOp.deleteNodeFailSilent(eachRoot));
1518         }
1519       } catch (InterruptedException e) {
1520         zkw.interruptedException(e);
1521       }
1522     }
1523     // atleast one element should exist
1524     if (ops.size() > 0) {
1525       multiOrSequential(zkw, ops, runSequentialOnMultiFailure);
1526     }
1527   }
1528 
1529   /**
1530    * BFS Traversal of all the children under path, with the entries in the list,
1531    * in the same order as that of the traversal. Lists all the children without
1532    * setting any watches.
1533    *
1534    * @param zkw
1535    *          - zk reference
1536    * @param znode
1537    *          - path of node
1538    * @return list of children znodes under the path
1539    * @throws KeeperException
1540    *           if unexpected ZooKeeper exception
1541    */
1542   private static List<String> listChildrenBFSNoWatch(ZooKeeperWatcher zkw,
1543       final String znode) throws KeeperException {
1544     Deque<String> queue = new LinkedList<String>();
1545     List<String> tree = new ArrayList<String>();
1546     queue.add(znode);
1547     while (true) {
1548       String node = queue.pollFirst();
1549       if (node == null) {
1550         break;
1551       }
1552       List<String> children = listChildrenNoWatch(zkw, node);
1553       if (children == null) {
1554         continue;
1555       }
1556       for (final String child : children) {
1557         final String childPath = node + "/" + child;
1558         queue.add(childPath);
1559         tree.add(childPath);
1560       }
1561     }
1562     return tree;
1563   }
1564 
1565   /**
1566    * BFS Traversal of all the children under path, with the entries in the list,
1567    * in the same order as that of the traversal.
1568    * Lists all the children and set watches on to them.
1569    *
1570    * @param zkw
1571    *          - zk reference
1572    * @param znode
1573    *          - path of node
1574    * @return list of children znodes under the path
1575    * @throws KeeperException
1576    *           if unexpected ZooKeeper exception
1577    */
1578   private static List<String> listChildrenBFSAndWatchThem(ZooKeeperWatcher zkw, final String znode)
1579       throws KeeperException {
1580     Deque<String> queue = new LinkedList<String>();
1581     List<String> tree = new ArrayList<String>();
1582     queue.add(znode);
1583     while (true) {
1584       String node = queue.pollFirst();
1585       if (node == null) {
1586         break;
1587       }
1588       List<String> children = listChildrenAndWatchThem(zkw, node);
1589       if (children == null) {
1590         continue;
1591       }
1592       for (final String child : children) {
1593         final String childPath = node + "/" + child;
1594         queue.add(childPath);
1595         tree.add(childPath);
1596       }
1597     }
1598     return tree;
1599   }
1600 
1601   /**
1602    * Represents an action taken by ZKUtil, e.g. createAndFailSilent.
1603    * These actions are higher-level than ZKOp actions, which represent
1604    * individual actions in the ZooKeeper API, like create.
1605    */
1606   public abstract static class ZKUtilOp {
1607     private String path;
1608 
1609     private ZKUtilOp(String path) {
1610       this.path = path;
1611     }
1612 
1613     /**
1614      * @return a createAndFailSilent ZKUtilOp
1615      */
1616     public static ZKUtilOp createAndFailSilent(String path, byte[] data) {
1617       return new CreateAndFailSilent(path, data);
1618     }
1619 
1620     /**
1621      * @return a deleteNodeFailSilent ZKUtilOP
1622      */
1623     public static ZKUtilOp deleteNodeFailSilent(String path) {
1624       return new DeleteNodeFailSilent(path);
1625     }
1626 
1627     /**
1628      * @return a setData ZKUtilOp
1629      */
1630     public static ZKUtilOp setData(String path, byte [] data) {
1631       return new SetData(path, data);
1632     }
1633 
1634     /**
1635      * @return path to znode where the ZKOp will occur
1636      */
1637     public String getPath() {
1638       return path;
1639     }
1640 
1641     /**
1642      * ZKUtilOp representing createAndFailSilent in ZooKeeper
1643      * (attempt to create node, ignore error if already exists)
1644      */
1645     public static class CreateAndFailSilent extends ZKUtilOp {
1646       private byte [] data;
1647 
1648       private CreateAndFailSilent(String path, byte [] data) {
1649         super(path);
1650         this.data = data;
1651       }
1652 
1653       public byte[] getData() {
1654         return data;
1655       }
1656 
1657       @Override
1658       public boolean equals(Object o) {
1659         if (this == o) return true;
1660         if (!(o instanceof CreateAndFailSilent)) return false;
1661 
1662         CreateAndFailSilent op = (CreateAndFailSilent) o;
1663         return getPath().equals(op.getPath()) && Arrays.equals(data, op.data);
1664       }
1665 
1666       @Override
1667       public int hashCode() {
1668         int ret = 17 + getPath().hashCode() * 31;
1669         return ret * 31 + Bytes.hashCode(data);
1670       }
1671     }
1672 
1673     /**
1674      * ZKUtilOp representing deleteNodeFailSilent in ZooKeeper
1675      * (attempt to delete node, ignore error if node doesn't exist)
1676      */
1677     public static class DeleteNodeFailSilent extends ZKUtilOp {
1678       private DeleteNodeFailSilent(String path) {
1679         super(path);
1680       }
1681 
1682       @Override
1683       public boolean equals(Object o) {
1684         if (this == o) return true;
1685         if (!(o instanceof DeleteNodeFailSilent)) return false;
1686 
1687         return super.equals(o);
1688       }
1689 
1690       @Override
1691       public int hashCode() {
1692         return getPath().hashCode();
1693       }
1694     }
1695 
1696     /**
1697      * ZKUtilOp representing setData in ZooKeeper
1698      */
1699     public static class SetData extends ZKUtilOp {
1700       private byte [] data;
1701 
1702       private SetData(String path, byte [] data) {
1703         super(path);
1704         this.data = data;
1705       }
1706 
1707       public byte[] getData() {
1708         return data;
1709       }
1710 
1711       @Override
1712       public boolean equals(Object o) {
1713         if (this == o) return true;
1714         if (!(o instanceof SetData)) return false;
1715 
1716         SetData op = (SetData) o;
1717         return getPath().equals(op.getPath()) && Arrays.equals(data, op.data);
1718       }
1719 
1720       @Override
1721       public int hashCode() {
1722         int ret = getPath().hashCode();
1723         return ret * 31 + Bytes.hashCode(data);
1724       }
1725     }
1726   }
1727 
1728   /**
1729    * Convert from ZKUtilOp to ZKOp
1730    */
1731   private static Op toZooKeeperOp(ZooKeeperWatcher zkw, ZKUtilOp op)
1732   throws UnsupportedOperationException {
1733     if(op == null) return null;
1734 
1735     if (op instanceof CreateAndFailSilent) {
1736       CreateAndFailSilent cafs = (CreateAndFailSilent)op;
1737       return Op.create(cafs.getPath(), cafs.getData(), createACL(zkw, cafs.getPath()),
1738         CreateMode.PERSISTENT);
1739     } else if (op instanceof DeleteNodeFailSilent) {
1740       DeleteNodeFailSilent dnfs = (DeleteNodeFailSilent)op;
1741       return Op.delete(dnfs.getPath(), -1);
1742     } else if (op instanceof SetData) {
1743       SetData sd = (SetData)op;
1744       return Op.setData(sd.getPath(), sd.getData(), -1);
1745     } else {
1746       throw new UnsupportedOperationException("Unexpected ZKUtilOp type: "
1747         + op.getClass().getName());
1748     }
1749   }
1750 
1751   /**
1752    * If hbase.zookeeper.useMulti is true, use ZooKeeper's multi-update functionality.
1753    * Otherwise, run the list of operations sequentially.
1754    *
1755    * If all of the following are true:
1756    * - runSequentialOnMultiFailure is true
1757    * - hbase.zookeeper.useMulti is true
1758    * - on calling multi, we get a ZooKeeper exception that can be handled by a sequential call(*)
1759    * Then:
1760    * - we retry the operations one-by-one (sequentially)
1761    *
1762    * Note *: an example is receiving a NodeExistsException from a "create" call.  Without multi,
1763    * a user could call "createAndFailSilent" to ensure that a node exists if they don't care who
1764    * actually created the node (i.e. the NodeExistsException from ZooKeeper is caught).
1765    * This will cause all operations in the multi to fail, however, because
1766    * the NodeExistsException that zk.create throws will fail the multi transaction.
1767    * In this case, if the previous conditions hold, the commands are run sequentially, which should
1768    * result in the correct final state, but means that the operations will not run atomically.
1769    *
1770    * @throws KeeperException
1771    */
1772   public static void multiOrSequential(ZooKeeperWatcher zkw, List<ZKUtilOp> ops,
1773       boolean runSequentialOnMultiFailure) throws KeeperException {
1774     if (ops == null) return;
1775     boolean useMulti = zkw.getConfiguration().getBoolean(HConstants.ZOOKEEPER_USEMULTI, false);
1776 
1777     if (useMulti) {
1778       List<Op> zkOps = new LinkedList<Op>();
1779       for (ZKUtilOp op : ops) {
1780         zkOps.add(toZooKeeperOp(zkw, op));
1781       }
1782       try {
1783         zkw.getRecoverableZooKeeper().multi(zkOps);
1784       } catch (KeeperException ke) {
1785        switch (ke.code()) {
1786          case NODEEXISTS:
1787          case NONODE:
1788          case BADVERSION:
1789          case NOAUTH:
1790            // if we get an exception that could be solved by running sequentially
1791            // (and the client asked us to), then break out and run sequentially
1792            if (runSequentialOnMultiFailure) {
1793              LOG.info("On call to ZK.multi, received exception: " + ke.toString() + "."
1794                + "  Attempting to run operations sequentially because"
1795                + " runSequentialOnMultiFailure is: " + runSequentialOnMultiFailure + ".");
1796              processSequentially(zkw, ops);
1797              break;
1798            }
1799           default:
1800             throw ke;
1801         }
1802       } catch (InterruptedException ie) {
1803         zkw.interruptedException(ie);
1804       }
1805     } else {
1806       // run sequentially
1807       processSequentially(zkw, ops);
1808     }
1809 
1810   }
1811 
1812   private static void processSequentially(ZooKeeperWatcher zkw, List<ZKUtilOp> ops)
1813       throws KeeperException, NoNodeException {
1814     for (ZKUtilOp op : ops) {
1815       if (op instanceof CreateAndFailSilent) {
1816         createAndFailSilent(zkw, (CreateAndFailSilent) op);
1817       } else if (op instanceof DeleteNodeFailSilent) {
1818         deleteNodeFailSilent(zkw, (DeleteNodeFailSilent) op);
1819       } else if (op instanceof SetData) {
1820         setData(zkw, (SetData) op);
1821       } else {
1822         throw new UnsupportedOperationException("Unexpected ZKUtilOp type: "
1823             + op.getClass().getName());
1824       }
1825     }
1826   }
1827 
1828   //
1829   // ZooKeeper cluster information
1830   //
1831 
1832   /** @return String dump of everything in ZooKeeper. */
1833   public static String dump(ZooKeeperWatcher zkw) {
1834     StringBuilder sb = new StringBuilder();
1835     try {
1836       sb.append("HBase is rooted at ").append(zkw.baseZNode);
1837       sb.append("\nActive master address: ");
1838       try {
1839         sb.append(MasterAddressTracker.getMasterAddress(zkw));
1840       } catch (IOException e) {
1841         sb.append("<<FAILED LOOKUP: " + e.getMessage() + ">>");
1842       }
1843       sb.append("\nBackup master addresses:");
1844       for (String child : listChildrenNoWatch(zkw,
1845                                               zkw.backupMasterAddressesZNode)) {
1846         sb.append("\n ").append(child);
1847       }
1848       sb.append("\nRegion server holding hbase:meta: "
1849         + new MetaTableLocator().getMetaRegionLocation(zkw));
1850       Configuration conf = HBaseConfiguration.create();
1851       int numMetaReplicas = conf.getInt(HConstants.META_REPLICAS_NUM,
1852                HConstants.DEFAULT_META_REPLICA_NUM);
1853       for (int i = 1; i < numMetaReplicas; i++) {
1854         sb.append("\nRegion server holding hbase:meta, replicaId " + i + " "
1855                     + new MetaTableLocator().getMetaRegionLocation(zkw, i));
1856       }
1857       sb.append("\nRegion servers:");
1858       for (String child : listChildrenNoWatch(zkw, zkw.rsZNode)) {
1859         sb.append("\n ").append(child);
1860       }
1861       try {
1862         getReplicationZnodesDump(zkw, sb);
1863       } catch (KeeperException ke) {
1864         LOG.warn("Couldn't get the replication znode dump", ke);
1865       }
1866       sb.append("\nQuorum Server Statistics:");
1867       String[] servers = zkw.getQuorum().split(",");
1868       for (String server : servers) {
1869         sb.append("\n ").append(server);
1870         try {
1871           String[] stat = getServerStats(server, ZKUtil.zkDumpConnectionTimeOut);
1872 
1873           if (stat == null) {
1874             sb.append("[Error] invalid quorum server: " + server);
1875             break;
1876           }
1877 
1878           for (String s : stat) {
1879             sb.append("\n  ").append(s);
1880           }
1881         } catch (Exception e) {
1882           sb.append("\n  ERROR: ").append(e.getMessage());
1883         }
1884       }
1885     } catch (KeeperException ke) {
1886       sb.append("\nFATAL ZooKeeper Exception!\n");
1887       sb.append("\n" + ke.getMessage());
1888     }
1889     return sb.toString();
1890   }
1891 
1892   /**
1893    * Appends replication znodes to the passed StringBuilder.
1894    * @param zkw
1895    * @param sb
1896    * @throws KeeperException
1897    */
1898   private static void getReplicationZnodesDump(ZooKeeperWatcher zkw, StringBuilder sb)
1899       throws KeeperException {
1900     String replicationZNodeName = zkw.getConfiguration().get("zookeeper.znode.replication",
1901       "replication");
1902     String replicationZnode = joinZNode(zkw.baseZNode, replicationZNodeName);
1903     if (ZKUtil.checkExists(zkw, replicationZnode) == -1) return;
1904     // do a ls -r on this znode
1905     sb.append("\n").append(replicationZnode).append(": ");
1906     List<String> children = ZKUtil.listChildrenNoWatch(zkw, replicationZnode);
1907     for (String child : children) {
1908       String znode = joinZNode(replicationZnode, child);
1909       if (child.equals(zkw.getConfiguration().get("zookeeper.znode.replication.peers", "peers"))) {
1910         appendPeersZnodes(zkw, znode, sb);
1911       } else if (child.equals(zkw.getConfiguration().
1912           get("zookeeper.znode.replication.rs", "rs"))) {
1913         appendRSZnodes(zkw, znode, sb);
1914       }
1915     }
1916   }
1917 
1918   private static void appendRSZnodes(ZooKeeperWatcher zkw, String znode, StringBuilder sb)
1919       throws KeeperException {
1920     List<String> stack = new LinkedList<String>();
1921     stack.add(znode);
1922     do {
1923       String znodeToProcess = stack.remove(stack.size() - 1);
1924       sb.append("\n").append(znodeToProcess).append(": ");
1925       byte[] data;
1926       try {
1927         data = ZKUtil.getData(zkw, znodeToProcess);
1928       } catch (InterruptedException e) {
1929         zkw.interruptedException(e);
1930         return;
1931       }
1932       if (data != null && data.length > 0) { // log position
1933         long position = 0;
1934         try {
1935           position = ZKUtil.parseWALPositionFrom(ZKUtil.getData(zkw, znodeToProcess));
1936           sb.append(position);
1937         } catch (DeserializationException ignored) {
1938         } catch (InterruptedException e) {
1939           zkw.interruptedException(e);
1940           return;
1941         }
1942       }
1943       for (String zNodeChild : ZKUtil.listChildrenNoWatch(zkw, znodeToProcess)) {
1944         stack.add(ZKUtil.joinZNode(znodeToProcess, zNodeChild));
1945       }
1946     } while (stack.size() > 0);
1947   }
1948 
1949   private static void appendPeersZnodes(ZooKeeperWatcher zkw, String peersZnode,
1950     StringBuilder sb) throws KeeperException {
1951     int pblen = ProtobufUtil.lengthOfPBMagic();
1952     sb.append("\n").append(peersZnode).append(": ");
1953     for (String peerIdZnode : ZKUtil.listChildrenNoWatch(zkw, peersZnode)) {
1954       String znodeToProcess = ZKUtil.joinZNode(peersZnode, peerIdZnode);
1955       byte[] data;
1956       try {
1957         data = ZKUtil.getData(zkw, znodeToProcess);
1958       } catch (InterruptedException e) {
1959         zkw.interruptedException(e);
1960         return;
1961       }
1962       // parse the data of the above peer znode.
1963       try {
1964         ZooKeeperProtos.ReplicationPeer.Builder builder =
1965           ZooKeeperProtos.ReplicationPeer.newBuilder();
1966         ProtobufUtil.mergeFrom(builder, data, pblen, data.length - pblen);
1967         String clusterKey = builder.getClusterkey();
1968         sb.append("\n").append(znodeToProcess).append(": ").append(clusterKey);
1969         // add the peer-state.
1970         appendPeerState(zkw, znodeToProcess, sb);
1971       } catch (IOException ipbe) {
1972         LOG.warn("Got Exception while parsing peer: " + znodeToProcess, ipbe);
1973       }
1974     }
1975   }
1976 
1977   private static void appendPeerState(ZooKeeperWatcher zkw, String znodeToProcess,
1978       StringBuilder sb) throws KeeperException, InvalidProtocolBufferException {
1979     String peerState = zkw.getConfiguration().get("zookeeper.znode.replication.peers.state",
1980       "peer-state");
1981     int pblen = ProtobufUtil.lengthOfPBMagic();
1982     for (String child : ZKUtil.listChildrenNoWatch(zkw, znodeToProcess)) {
1983       if (!child.equals(peerState)) continue;
1984       String peerStateZnode = ZKUtil.joinZNode(znodeToProcess, child);
1985       sb.append("\n").append(peerStateZnode).append(": ");
1986       byte[] peerStateData;
1987       try {
1988         peerStateData = ZKUtil.getData(zkw, peerStateZnode);
1989         ZooKeeperProtos.ReplicationState.Builder builder =
1990             ZooKeeperProtos.ReplicationState.newBuilder();
1991         ProtobufUtil.mergeFrom(builder, peerStateData, pblen, peerStateData.length - pblen);
1992         sb.append(builder.getState().name());
1993       } catch (IOException ipbe) {
1994         LOG.warn("Got Exception while parsing peer: " + znodeToProcess, ipbe);
1995       } catch (InterruptedException e) {
1996         zkw.interruptedException(e);
1997         return;
1998       }
1999     }
2000   }
2001 
2002   /**
2003    * Gets the statistics from the given server.
2004    *
2005    * @param server  The server to get the statistics from.
2006    * @param timeout  The socket timeout to use.
2007    * @return The array of response strings.
2008    * @throws IOException When the socket communication fails.
2009    */
2010   public static String[] getServerStats(String server, int timeout)
2011   throws IOException {
2012     String[] sp = server.split(":");
2013     if (sp == null || sp.length == 0) {
2014       return null;
2015     }
2016 
2017     String host = sp[0];
2018     int port = sp.length > 1 ? Integer.parseInt(sp[1])
2019         : HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT;
2020 
2021     Socket socket = new Socket();
2022     InetSocketAddress sockAddr = new InetSocketAddress(host, port);
2023     socket.connect(sockAddr, timeout);
2024 
2025     socket.setSoTimeout(timeout);
2026     PrintWriter out = new PrintWriter(socket.getOutputStream(), true);
2027     BufferedReader in = new BufferedReader(new InputStreamReader(
2028       socket.getInputStream()));
2029     out.println("stat");
2030     out.flush();
2031     ArrayList<String> res = new ArrayList<String>();
2032     while (true) {
2033       String line = in.readLine();
2034       if (line != null) {
2035         res.add(line);
2036       } else {
2037         break;
2038       }
2039     }
2040     socket.close();
2041     return res.toArray(new String[res.size()]);
2042   }
2043 
2044   private static void logRetrievedMsg(final ZooKeeperWatcher zkw,
2045       final String znode, final byte [] data, final boolean watcherSet) {
2046     if (!LOG.isTraceEnabled()) return;
2047     LOG.trace(zkw.prefix("Retrieved " + ((data == null)? 0: data.length) +
2048       " byte(s) of data from znode " + znode +
2049       (watcherSet? " and set watcher; ": "; data=") +
2050       (data == null? "null": data.length == 0? "empty": (
2051           znode.startsWith(zkw.assignmentZNode)?
2052             ZKAssign.toString(data): // We should not be doing this reaching into another class
2053           znode.startsWith(ZooKeeperWatcher.META_ZNODE_PREFIX)?
2054             getServerNameOrEmptyString(data):
2055           znode.startsWith(zkw.backupMasterAddressesZNode)?
2056             getServerNameOrEmptyString(data):
2057           StringUtils.abbreviate(Bytes.toStringBinary(data), 32)))));
2058   }
2059 
2060   private static String getServerNameOrEmptyString(final byte [] data) {
2061     try {
2062       return ServerName.parseFrom(data).toString();
2063     } catch (DeserializationException e) {
2064       return "";
2065     }
2066   }
2067 
2068   /**
2069    * Waits for HBase installation's base (parent) znode to become available.
2070    * @throws IOException on ZK errors
2071    */
2072   public static void waitForBaseZNode(Configuration conf) throws IOException {
2073     LOG.info("Waiting until the base znode is available");
2074     String parentZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
2075         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
2076     ZooKeeper zk = new ZooKeeper(ZKConfig.getZKQuorumServersString(conf),
2077         conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
2078         HConstants.DEFAULT_ZK_SESSION_TIMEOUT), EmptyWatcher.instance);
2079 
2080     final int maxTimeMs = 10000;
2081     final int maxNumAttempts = maxTimeMs / HConstants.SOCKET_RETRY_WAIT_MS;
2082 
2083     KeeperException keeperEx = null;
2084     try {
2085       try {
2086         for (int attempt = 0; attempt < maxNumAttempts; ++attempt) {
2087           try {
2088             if (zk.exists(parentZNode, false) != null) {
2089               LOG.info("Parent znode exists: " + parentZNode);
2090               keeperEx = null;
2091               break;
2092             }
2093           } catch (KeeperException e) {
2094             keeperEx = e;
2095           }
2096           Threads.sleepWithoutInterrupt(HConstants.SOCKET_RETRY_WAIT_MS);
2097         }
2098       } finally {
2099         zk.close();
2100       }
2101     } catch (InterruptedException ex) {
2102       Thread.currentThread().interrupt();
2103     }
2104 
2105     if (keeperEx != null) {
2106       throw new IOException(keeperEx);
2107     }
2108   }
2109 
2110 
2111   public static byte[] blockUntilAvailable(
2112     final ZooKeeperWatcher zkw, final String znode, final long timeout)
2113     throws InterruptedException {
2114     if (timeout < 0) throw new IllegalArgumentException();
2115     if (zkw == null) throw new IllegalArgumentException();
2116     if (znode == null) throw new IllegalArgumentException();
2117 
2118     byte[] data = null;
2119     boolean finished = false;
2120     final long endTime = System.currentTimeMillis() + timeout;
2121     while (!finished) {
2122       try {
2123         data = ZKUtil.getData(zkw, znode);
2124       } catch(KeeperException e) {
2125         if (e instanceof KeeperException.SessionExpiredException
2126             || e instanceof KeeperException.AuthFailedException) {
2127           // non-recoverable errors so stop here
2128           throw new InterruptedException("interrupted due to " + e);
2129         }
2130         LOG.warn("Unexpected exception handling blockUntilAvailable", e);
2131       }
2132 
2133       if (data == null && (System.currentTimeMillis() +
2134         HConstants.SOCKET_RETRY_WAIT_MS < endTime)) {
2135         Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
2136       } else {
2137         finished = true;
2138       }
2139     }
2140 
2141     return data;
2142   }
2143 
2144 
2145   /**
2146    * Convert a {@link DeserializationException} to a more palatable {@link KeeperException}.
2147    * Used when can't let a {@link DeserializationException} out w/o changing public API.
2148    * @param e Exception to convert
2149    * @return Converted exception
2150    */
2151   public static KeeperException convert(final DeserializationException e) {
2152     KeeperException ke = new KeeperException.DataInconsistencyException();
2153     ke.initCause(e);
2154     return ke;
2155   }
2156 
2157   /**
2158    * Recursively print the current state of ZK (non-transactional)
2159    * @param root name of the root directory in zk to print
2160    * @throws KeeperException
2161    */
2162   public static void logZKTree(ZooKeeperWatcher zkw, String root) {
2163     if (!LOG.isDebugEnabled()) return;
2164     LOG.debug("Current zk system:");
2165     String prefix = "|-";
2166     LOG.debug(prefix + root);
2167     try {
2168       logZKTree(zkw, root, prefix);
2169     } catch (KeeperException e) {
2170       throw new RuntimeException(e);
2171     }
2172   }
2173 
2174   /**
2175    * Helper method to print the current state of the ZK tree.
2176    * @see #logZKTree(ZooKeeperWatcher, String)
2177    * @throws KeeperException if an unexpected exception occurs
2178    */
2179   protected static void logZKTree(ZooKeeperWatcher zkw, String root, String prefix)
2180       throws KeeperException {
2181     List<String> children = ZKUtil.listChildrenNoWatch(zkw, root);
2182     if (children == null) return;
2183     for (String child : children) {
2184       LOG.debug(prefix + child);
2185       String node = ZKUtil.joinZNode(root.equals("/") ? "" : root, child);
2186       logZKTree(zkw, node, prefix + "---");
2187     }
2188   }
2189 
2190   /**
2191    * @param position
2192    * @return Serialized protobuf of <code>position</code> with pb magic prefix prepended suitable
2193    *         for use as content of an wal position in a replication queue.
2194    */
2195   public static byte[] positionToByteArray(final long position) {
2196     byte[] bytes = ZooKeeperProtos.ReplicationHLogPosition.newBuilder().setPosition(position)
2197         .build().toByteArray();
2198     return ProtobufUtil.prependPBMagic(bytes);
2199   }
2200 
2201   /**
2202    * @param bytes - Content of a WAL position znode.
2203    * @return long - The current WAL position.
2204    * @throws DeserializationException
2205    */
2206   public static long parseWALPositionFrom(final byte[] bytes) throws DeserializationException {
2207     if (bytes == null) {
2208       throw new DeserializationException("Unable to parse null WAL position.");
2209     }
2210     if (ProtobufUtil.isPBMagicPrefix(bytes)) {
2211       int pblen = ProtobufUtil.lengthOfPBMagic();
2212       ZooKeeperProtos.ReplicationHLogPosition.Builder builder =
2213           ZooKeeperProtos.ReplicationHLogPosition.newBuilder();
2214       ZooKeeperProtos.ReplicationHLogPosition position;
2215       try {
2216         ProtobufUtil.mergeFrom(builder, bytes, pblen, bytes.length - pblen);
2217         position = builder.build();
2218       } catch (IOException e) {
2219         throw new DeserializationException(e);
2220       }
2221       return position.getPosition();
2222     } else {
2223       if (bytes.length > 0) {
2224         return Bytes.toLong(bytes);
2225       }
2226       return 0;
2227     }
2228   }
2229 
2230   /**
2231    * @param regionLastFlushedSequenceId the flushed sequence id of a region which is the min of its
2232    *          store max seq ids
2233    * @param storeSequenceIds column family to sequence Id map
2234    * @return Serialized protobuf of <code>RegionSequenceIds</code> with pb magic prefix prepended
2235    *         suitable for use to filter wal edits in distributedLogReplay mode
2236    */
2237   public static byte[] regionSequenceIdsToByteArray(final Long regionLastFlushedSequenceId,
2238       final Map<byte[], Long> storeSequenceIds) {
2239     ClusterStatusProtos.RegionStoreSequenceIds.Builder regionSequenceIdsBuilder =
2240         ClusterStatusProtos.RegionStoreSequenceIds.newBuilder();
2241     ClusterStatusProtos.StoreSequenceId.Builder storeSequenceIdBuilder =
2242         ClusterStatusProtos.StoreSequenceId.newBuilder();
2243     if (storeSequenceIds != null) {
2244       for (Map.Entry<byte[], Long> e : storeSequenceIds.entrySet()){
2245         byte[] columnFamilyName = e.getKey();
2246         Long curSeqId = e.getValue();
2247         storeSequenceIdBuilder.setFamilyName(ByteStringer.wrap(columnFamilyName));
2248         storeSequenceIdBuilder.setSequenceId(curSeqId);
2249         regionSequenceIdsBuilder.addStoreSequenceId(storeSequenceIdBuilder.build());
2250         storeSequenceIdBuilder.clear();
2251       }
2252     }
2253     regionSequenceIdsBuilder.setLastFlushedSequenceId(regionLastFlushedSequenceId);
2254     byte[] result = regionSequenceIdsBuilder.build().toByteArray();
2255     return ProtobufUtil.prependPBMagic(result);
2256   }
2257 
2258   /**
2259    * @param bytes Content of serialized data of RegionStoreSequenceIds
2260    * @return a RegionStoreSequenceIds object
2261    * @throws DeserializationException
2262    */
2263   public static RegionStoreSequenceIds parseRegionStoreSequenceIds(final byte[] bytes)
2264       throws DeserializationException {
2265     if (bytes == null || !ProtobufUtil.isPBMagicPrefix(bytes)) {
2266       throw new DeserializationException("Unable to parse RegionStoreSequenceIds.");
2267     }
2268     RegionStoreSequenceIds.Builder regionSequenceIdsBuilder =
2269         ClusterStatusProtos.RegionStoreSequenceIds.newBuilder();
2270     int pblen = ProtobufUtil.lengthOfPBMagic();
2271     RegionStoreSequenceIds storeIds = null;
2272     try {
2273       ProtobufUtil.mergeFrom(regionSequenceIdsBuilder, bytes, pblen, bytes.length - pblen);
2274       storeIds = regionSequenceIdsBuilder.build();
2275     } catch (IOException e) {
2276       throw new DeserializationException(e);
2277     }
2278     return storeIds;
2279   }
2280 }