View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.zookeeper;
21  
22  import java.util.List;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.ServerName;
28  import org.apache.hadoop.hbase.executor.RegionTransitionData;
29  import org.apache.hadoop.hbase.executor.EventHandler.EventType;
30  import org.apache.zookeeper.AsyncCallback;
31  import org.apache.zookeeper.KeeperException;
32  import org.apache.zookeeper.KeeperException.Code;
33  import org.apache.zookeeper.KeeperException.NoNodeException;
34  import org.apache.zookeeper.KeeperException.NodeExistsException;
35  import org.apache.zookeeper.data.Stat;
36  
37  /**
38   * Utility class for doing region assignment in ZooKeeper.  This class extends
39   * stuff done in {@link ZKUtil} to cover specific assignment operations.
40   * <p>
41   * Contains only static methods and constants.
42   * <p>
43   * Used by both the Master and RegionServer.
44   * <p>
45   * All valid transitions outlined below:
46   * <p>
47   * <b>MASTER</b>
48   * <ol>
49   *   <li>
50   *     Master creates an unassigned node as OFFLINE.
51   *     - Cluster startup and table enabling.
52   *   </li>
53   *   <li>
54   *     Master forces an existing unassigned node to OFFLINE.
55   *     - RegionServer failure.
56   *     - Allows transitions from all states to OFFLINE.
57   *   </li>
58   *   <li>
59   *     Master deletes an unassigned node that was in a OPENED state.
60   *     - Normal region transitions.  Besides cluster startup, no other deletions
61   *     of unassigned nodes is allowed.
62   *   </li>
63   *   <li>
64   *     Master deletes all unassigned nodes regardless of state.
65   *     - Cluster startup before any assignment happens.
66   *   </li>
67   * </ol>
68   * <p>
69   * <b>REGIONSERVER</b>
70   * <ol>
71   *   <li>
72   *     RegionServer creates an unassigned node as CLOSING.
73   *     - All region closes will do this in response to a CLOSE RPC from Master.
74   *     - A node can never be transitioned to CLOSING, only created.
75   *   </li>
76   *   <li>
77   *     RegionServer transitions an unassigned node from CLOSING to CLOSED.
78   *     - Normal region closes.  CAS operation.
79   *   </li>
80   *   <li>
81   *     RegionServer transitions an unassigned node from OFFLINE to OPENING.
82   *     - All region opens will do this in response to an OPEN RPC from the Master.
83   *     - Normal region opens.  CAS operation.
84   *   </li>
85   *   <li>
86   *     RegionServer transitions an unassigned node from OPENING to OPENED.
87   *     - Normal region opens.  CAS operation.
88   *   </li>
89   * </ol>
90   */
91  public class ZKAssign {
92    private static final Log LOG = LogFactory.getLog(ZKAssign.class);
93  
94    /**
95     * Gets the full path node name for the unassigned node for the specified
96     * region.
97     * @param zkw zk reference
98     * @param regionName region name
99     * @return full path node name
100    */
101   public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
102     return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
103   }
104 
105   /**
106    * Gets the region name from the full path node name of an unassigned node.
107    * @param path full zk path
108    * @return region name
109    */
110   public static String getRegionName(ZooKeeperWatcher zkw, String path) {
111     return path.substring(zkw.assignmentZNode.length()+1);
112   }
113 
114   // Master methods
115 
116   /**
117    * Creates a new unassigned node in the OFFLINE state for the specified region.
118    *
119    * <p>Does not transition nodes from other states.  If a node already exists
120    * for this region, a {@link NodeExistsException} will be thrown.
121    *
122    * <p>Sets a watcher on the unassigned region node if the method is successful.
123    *
124    * <p>This method should only be used during cluster startup and the enabling
125    * of a table.
126    *
127    * @param zkw zk reference
128    * @param region region to be created as offline
129    * @param serverName server event originates from
130    * @throws KeeperException if unexpected zookeeper exception
131    * @throws KeeperException.NodeExistsException if node already exists
132    */
133   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
134       ServerName serverName)
135   throws KeeperException, KeeperException.NodeExistsException {
136     createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
137   }
138 
139   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
140       ServerName serverName, final EventType event)
141   throws KeeperException, KeeperException.NodeExistsException {
142     LOG.debug(zkw.prefix("Creating unassigned node for " +
143       region.getEncodedName() + " in OFFLINE state"));
144     RegionTransitionData data = new RegionTransitionData(event,
145       region.getRegionName(), serverName);
146     String node = getNodeName(zkw, region.getEncodedName());
147     ZKUtil.createAndWatch(zkw, node, data.getBytes());
148   }
149 
150   /**
151    * Creates an unassigned node in the OFFLINE state for the specified region.
152    * <p>
153    * Runs asynchronously.  Depends on no pre-existing znode.
154    *
155    * <p>Sets a watcher on the unassigned region node.
156    *
157    * @param zkw zk reference
158    * @param region region to be created as offline
159    * @param serverName server event originates from
160    * @param cb
161    * @param ctx
162    * @throws KeeperException if unexpected zookeeper exception
163    * @throws KeeperException.NodeExistsException if node already exists
164    */
165   public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
166       HRegionInfo region, ServerName serverName,
167       final AsyncCallback.StringCallback cb, final Object ctx)
168   throws KeeperException {
169     LOG.debug(zkw.prefix("Async create of unassigned node for " +
170       region.getEncodedName() + " with OFFLINE state"));
171     RegionTransitionData data = new RegionTransitionData(
172         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
173     String node = getNodeName(zkw, region.getEncodedName());
174     ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx);
175   }
176 
177   /**
178    * Forces an existing unassigned node to the OFFLINE state for the specified
179    * region.
180    *
181    * <p>Does not create a new node.  If a node does not already exist for this
182    * region, a {@link NoNodeException} will be thrown.
183    *
184    * <p>Sets a watcher on the unassigned region node if the method is
185    * successful.
186    *
187    * <p>This method should only be used during recovery of regionserver failure.
188    *
189    * @param zkw zk reference
190    * @param region region to be forced as offline
191    * @param serverName server event originates from
192    * @throws KeeperException if unexpected zookeeper exception
193    * @throws KeeperException.NoNodeException if node does not exist
194    */
195   public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
196       ServerName serverName)
197   throws KeeperException, KeeperException.NoNodeException {
198     LOG.debug(zkw.prefix("Forcing existing unassigned node for " +
199       region.getEncodedName() + " to OFFLINE state"));
200     RegionTransitionData data = new RegionTransitionData(
201         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
202     String node = getNodeName(zkw, region.getEncodedName());
203     ZKUtil.setData(zkw, node, data.getBytes());
204   }
205 
206   /**
207    * Creates or force updates an unassigned node to the OFFLINE state for the
208    * specified region.
209    * <p>
210    * Attempts to create the node but if it exists will force it to transition to
211    * and OFFLINE state.
212    *
213    * <p>Sets a watcher on the unassigned region node if the method is
214    * successful.
215    *
216    * <p>This method should be used when assigning a region.
217    *
218    * @param zkw zk reference
219    * @param region region to be created as offline
220    * @param serverName server event originates from
221    * @return the version of the znode created in OFFLINE state, -1 if
222    *         unsuccessful.
223    * @throws KeeperException if unexpected zookeeper exception
224    * @throws KeeperException.NodeExistsException if node already exists
225    */
226   public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
227       HRegionInfo region, ServerName serverName) throws KeeperException {
228     return createOrForceNodeOffline(zkw, region, serverName, false, true);
229   }
230 
231   /**
232    * Creates or force updates an unassigned node to the OFFLINE state for the
233    * specified region.
234    * <p>
235    * Attempts to create the node but if it exists will force it to transition to
236    * and OFFLINE state.
237    * <p>
238    * Sets a watcher on the unassigned region node if the method is successful.
239    * 
240    * <p>
241    * This method should be used when assigning a region.
242    * 
243    * @param zkw
244    *          zk reference
245    * @param region
246    *          region to be created as offline
247    * @param serverName
248    *          server event originates from
249    * @param hijack
250    *          - true if to be hijacked and reassigned, false otherwise
251    * @param allowCreation
252    *          - true if the node has to be created newly, false otherwise
253    * @throws KeeperException
254    *           if unexpected zookeeper exception
255    * @return the version of the znode created in OFFLINE state, -1 if
256    *         unsuccessful.
257    * @throws KeeperException.NodeExistsException
258    *           if node already exists
259    */
260   public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
261       HRegionInfo region, ServerName serverName,
262       boolean hijack, boolean allowCreation)
263   throws KeeperException {
264     LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
265       region.getEncodedName() + " with OFFLINE state"));
266     RegionTransitionData data = new RegionTransitionData(
267         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
268     String node = getNodeName(zkw, region.getEncodedName());
269     Stat stat = new Stat();
270     zkw.sync(node);
271     int version = ZKUtil.checkExists(zkw, node);
272     if (version == -1) {
273       // While trying to transit a node to OFFLINE that was in previously in 
274       // OPENING state but before it could transit to OFFLINE state if RS had 
275       // opened the region then the Master deletes the assigned region znode. 
276       // In that case the znode will not exist. So we should not
277       // create the znode again which will lead to double assignment.
278       if (hijack && !allowCreation) {
279         return -1;
280       }
281       return ZKUtil.createAndWatch(zkw, node, data.getBytes());
282     } else {
283       RegionTransitionData curDataInZNode = ZKAssign.getDataNoWatch(zkw, region
284           .getEncodedName(), stat);
285       // Do not move the node to OFFLINE if znode is in any of the following
286       // state.
287       // Because these are already executed states.
288       if (hijack && null != curDataInZNode) {
289         EventType eventType = curDataInZNode.getEventType();
290         if (eventType.equals(EventType.M_ZK_REGION_CLOSING)
291             || eventType.equals(EventType.RS_ZK_REGION_CLOSED)
292             || eventType.equals(EventType.RS_ZK_REGION_OPENED)) {
293           return -1;
294         }
295       }
296 
297       boolean setData = false;
298       try {
299         setData = ZKUtil.setData(zkw, node, data.getBytes(), version);
300         // Setdata throws KeeperException which aborts the Master. So we are
301         // catching it here.
302         // If just before setting the znode to OFFLINE if the RS has made any
303         // change to the
304         // znode state then we need to return -1.
305       } catch (KeeperException kpe) {
306         LOG.info("Version mismatch while setting the node to OFFLINE state.");
307         return -1;
308       }
309       if (!setData) {
310         return -1;
311       } else {
312         // We successfully forced to OFFLINE, reset watch and handle if
313         // the state changed in between our set and the watch
314         RegionTransitionData curData =
315           ZKAssign.getData(zkw, region.getEncodedName());
316         if (curData.getEventType() != data.getEventType()) {
317           // state changed, need to process
318           return -1;
319         }
320       }
321     }
322     return stat.getVersion() + 1;
323   }
324 
325   /**
326    * Deletes an existing unassigned node that is in the OPENED state for the
327    * specified region.
328    *
329    * <p>If a node does not already exist for this region, a
330    * {@link NoNodeException} will be thrown.
331    *
332    * <p>No watcher is set whether this succeeds or not.
333    *
334    * <p>Returns false if the node was not in the proper state but did exist.
335    *
336    * <p>This method is used during normal region transitions when a region
337    * finishes successfully opening.  This is the Master acknowledging completion
338    * of the specified regions transition.
339    *
340    * @param zkw zk reference
341    * @param regionName opened region to be deleted from zk
342    * @throws KeeperException if unexpected zookeeper exception
343    * @throws KeeperException.NoNodeException if node does not exist
344    */
345   public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
346       String regionName)
347   throws KeeperException, KeeperException.NoNodeException {
348     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_OPENED);
349   }
350 
351   /**
352    * Deletes an existing unassigned node that is in the OFFLINE state for the
353    * specified region.
354    *
355    * <p>If a node does not already exist for this region, a
356    * {@link NoNodeException} will be thrown.
357    *
358    * <p>No watcher is set whether this succeeds or not.
359    *
360    * <p>Returns false if the node was not in the proper state but did exist.
361    *
362    * <p>This method is used during master failover when the regions on an RS
363    * that has died are all set to OFFLINE before being processed.
364    *
365    * @param zkw zk reference
366    * @param regionName closed region to be deleted from zk
367    * @throws KeeperException if unexpected zookeeper exception
368    * @throws KeeperException.NoNodeException if node does not exist
369    */
370   public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
371       String regionName)
372   throws KeeperException, KeeperException.NoNodeException {
373     return deleteNode(zkw, regionName, EventType.M_ZK_REGION_OFFLINE);
374   }
375 
376   /**
377    * Deletes an existing unassigned node that is in the CLOSED state for the
378    * specified region.
379    *
380    * <p>If a node does not already exist for this region, a
381    * {@link NoNodeException} will be thrown.
382    *
383    * <p>No watcher is set whether this succeeds or not.
384    *
385    * <p>Returns false if the node was not in the proper state but did exist.
386    *
387    * <p>This method is used during table disables when a region finishes
388    * successfully closing.  This is the Master acknowledging completion
389    * of the specified regions transition to being closed.
390    *
391    * @param zkw zk reference
392    * @param regionName closed region to be deleted from zk
393    * @throws KeeperException if unexpected zookeeper exception
394    * @throws KeeperException.NoNodeException if node does not exist
395    */
396   public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
397       String regionName)
398   throws KeeperException, KeeperException.NoNodeException {
399     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSED);
400   }
401 
402   /**
403    * Deletes an existing unassigned node that is in the CLOSING state for the
404    * specified region.
405    *
406    * <p>If a node does not already exist for this region, a
407    * {@link NoNodeException} will be thrown.
408    *
409    * <p>No watcher is set whether this succeeds or not.
410    *
411    * <p>Returns false if the node was not in the proper state but did exist.
412    *
413    * <p>This method is used during table disables when a region finishes
414    * successfully closing.  This is the Master acknowledging completion
415    * of the specified regions transition to being closed.
416    *
417    * @param zkw zk reference
418    * @param region closing region to be deleted from zk
419    * @throws KeeperException if unexpected zookeeper exception
420    * @throws KeeperException.NoNodeException if node does not exist
421    */
422   public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
423       HRegionInfo region)
424   throws KeeperException, KeeperException.NoNodeException {
425     String regionName = region.getEncodedName();
426     return deleteNode(zkw, regionName, EventType.M_ZK_REGION_CLOSING);
427   }
428 
429   /**
430    * Deletes an existing unassigned node that is in the specified state for the
431    * specified region.
432    *
433    * <p>If a node does not already exist for this region, a
434    * {@link NoNodeException} will be thrown.
435    *
436    * <p>No watcher is set whether this succeeds or not.
437    *
438    * <p>Returns false if the node was not in the proper state but did exist.
439    *
440    * <p>This method is used when a region finishes opening/closing.
441    * The Master acknowledges completion
442    * of the specified regions transition to being closed/opened.
443    *
444    * @param zkw zk reference
445    * @param regionName region to be deleted from zk
446    * @param expectedState state region must be in for delete to complete
447    * @throws KeeperException if unexpected zookeeper exception
448    * @throws KeeperException.NoNodeException if node does not exist
449    */
450   public static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
451       EventType expectedState)
452   throws KeeperException, KeeperException.NoNodeException {
453     return deleteNode(zkw, regionName, expectedState, -1);
454   }
455 
456   /**
457    * Deletes an existing unassigned node that is in the specified state for the
458    * specified region.
459    *
460    * <p>If a node does not already exist for this region, a
461    * {@link NoNodeException} will be thrown.
462    *
463    * <p>No watcher is set whether this succeeds or not.
464    *
465    * <p>Returns false if the node was not in the proper state but did exist.
466    *
467    * <p>This method is used when a region finishes opening/closing.
468    * The Master acknowledges completion
469    * of the specified regions transition to being closed/opened.
470    *
471    * @param zkw zk reference
472    * @param regionName region to be deleted from zk
473    * @param expectedState state region must be in for delete to complete
474    * @param expectedVersion of the znode that is to be deleted.
475    *        If expectedVersion need not be compared while deleting the znode
476    *        pass -1
477    * @throws KeeperException if unexpected zookeeper exception
478    * @throws KeeperException.NoNodeException if node does not exist
479    */
480   public static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
481       EventType expectedState, int expectedVersion)
482   throws KeeperException, KeeperException.NoNodeException {
483     LOG.debug(zkw.prefix("Deleting existing unassigned " +
484       "node for " + regionName + " that is in expected state " + expectedState));
485     String node = getNodeName(zkw, regionName);
486     zkw.sync(node);
487     Stat stat = new Stat();
488     byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
489     if (bytes == null) {
490       // If it came back null, node does not exist.
491       throw KeeperException.create(Code.NONODE);
492     }
493     RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
494     if (!data.getEventType().equals(expectedState)) {
495       LOG.warn(zkw.prefix("Attempting to delete unassigned " +
496         "node " + regionName + " in " + expectedState +
497         " state but node is in " + data.getEventType() + " state"));
498       return false;
499     }
500     if (expectedVersion != -1
501         && stat.getVersion() != expectedVersion) {
502       LOG.warn("The node " + regionName + " we are trying to delete is not" +
503         " the expected one. Got a version mismatch");
504       return false;
505     }
506     if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
507       LOG.warn(zkw.prefix("Attempting to delete " +
508           "unassigned node " + regionName + " in " + expectedState +
509           " state but after verifying state, we got a version mismatch"));
510       return false;
511     }
512     LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
513         regionName + " in expected state " + expectedState));
514     return true;
515   }
516 
517   /**
518    * Deletes all unassigned nodes regardless of their state.
519    *
520    * <p>No watchers are set.
521    *
522    * <p>This method is used by the Master during cluster startup to clear out
523    * any existing state from other cluster runs.
524    *
525    * @param zkw zk reference
526    * @throws KeeperException if unexpected zookeeper exception
527    */
528   public static void deleteAllNodes(ZooKeeperWatcher zkw)
529   throws KeeperException {
530     LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
531     ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
532   }
533 
534   // RegionServer methods
535 
536   /**
537    * Creates a new unassigned node in the CLOSING state for the specified
538    * region.
539    *
540    * <p>Does not transition nodes from any states.  If a node already exists
541    * for this region, a {@link NodeExistsException} will be thrown.
542    *
543    * <p>If creation is successful, returns the version number of the CLOSING
544    * node created.
545    *
546    * <p>Does not set any watches.
547    *
548    * <p>This method should only be used by a RegionServer when initiating a
549    * close of a region after receiving a CLOSE RPC from the Master.
550    *
551    * @param zkw zk reference
552    * @param region region to be created as closing
553    * @param serverName server event originates from
554    * @return version of node after transition, -1 if unsuccessful transition
555    * @throws KeeperException if unexpected zookeeper exception
556    * @throws KeeperException.NodeExistsException if node already exists
557    */
558   public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
559       ServerName serverName)
560   throws KeeperException, KeeperException.NodeExistsException {
561     LOG.debug(zkw.prefix("Creating unassigned node for " +
562       region.getEncodedName() + " in a CLOSING state"));
563 
564     RegionTransitionData data = new RegionTransitionData(
565         EventType.M_ZK_REGION_CLOSING, region.getRegionName(), serverName);
566 
567     String node = getNodeName(zkw, region.getEncodedName());
568     return ZKUtil.createAndWatch(zkw, node, data.getBytes());
569   }
570 
571   /**
572    * Transitions an existing unassigned node for the specified region which is
573    * currently in the CLOSING state to be in the CLOSED state.
574    *
575    * <p>Does not transition nodes from other states.  If for some reason the
576    * node could not be transitioned, the method returns -1.  If the transition
577    * is successful, the version of the node after transition is returned.
578    *
579    * <p>This method can fail and return false for three different reasons:
580    * <ul><li>Unassigned node for this region does not exist</li>
581    * <li>Unassigned node for this region is not in CLOSING state</li>
582    * <li>After verifying CLOSING state, update fails because of wrong version
583    * (someone else already transitioned the node)</li>
584    * </ul>
585    *
586    * <p>Does not set any watches.
587    *
588    * <p>This method should only be used by a RegionServer when initiating a
589    * close of a region after receiving a CLOSE RPC from the Master.
590    *
591    * @param zkw zk reference
592    * @param region region to be transitioned to closed
593    * @param serverName server event originates from
594    * @return version of node after transition, -1 if unsuccessful transition
595    * @throws KeeperException if unexpected zookeeper exception
596    */
597   public static int transitionNodeClosed(ZooKeeperWatcher zkw,
598       HRegionInfo region, ServerName serverName, int expectedVersion)
599   throws KeeperException {
600     return transitionNode(zkw, region, serverName,
601         EventType.M_ZK_REGION_CLOSING,
602         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
603   }
604 
605   /**
606    * Transitions an existing unassigned node for the specified region which is
607    * currently in the OFFLINE state to be in the OPENING state.
608    *
609    * <p>Does not transition nodes from other states.  If for some reason the
610    * node could not be transitioned, the method returns -1.  If the transition
611    * is successful, the version of the node written as OPENING is returned.
612    *
613    * <p>This method can fail and return -1 for three different reasons:
614    * <ul><li>Unassigned node for this region does not exist</li>
615    * <li>Unassigned node for this region is not in OFFLINE state</li>
616    * <li>After verifying OFFLINE state, update fails because of wrong version
617    * (someone else already transitioned the node)</li>
618    * </ul>
619    *
620    * <p>Does not set any watches.
621    *
622    * <p>This method should only be used by a RegionServer when initiating an
623    * open of a region after receiving an OPEN RPC from the Master.
624    *
625    * @param zkw zk reference
626    * @param region region to be transitioned to opening
627    * @param serverName server event originates from
628    * @return version of node after transition, -1 if unsuccessful transition
629    * @throws KeeperException if unexpected zookeeper exception
630    */
631   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
632       HRegionInfo region, ServerName serverName)
633   throws KeeperException {
634     return transitionNodeOpening(zkw, region, serverName,
635       EventType.M_ZK_REGION_OFFLINE);
636   }
637 
638   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
639       HRegionInfo region, ServerName serverName, final EventType beginState)
640   throws KeeperException {
641     return transitionNode(zkw, region, serverName, beginState,
642       EventType.RS_ZK_REGION_OPENING, -1);
643   }
644 
645   /**
646    * Retransitions an existing unassigned node for the specified region which is
647    * currently in the OPENING state to be in the OPENING state.
648    *
649    * <p>Does not transition nodes from other states.  If for some reason the
650    * node could not be transitioned, the method returns -1.  If the transition
651    * is successful, the version of the node rewritten as OPENING is returned.
652    *
653    * <p>This method can fail and return -1 for three different reasons:
654    * <ul><li>Unassigned node for this region does not exist</li>
655    * <li>Unassigned node for this region is not in OPENING state</li>
656    * <li>After verifying OPENING state, update fails because of wrong version
657    * (someone else already transitioned the node)</li>
658    * </ul>
659    *
660    * <p>Does not set any watches.
661    *
662    * <p>This method should only be used by a RegionServer when initiating an
663    * open of a region after receiving an OPEN RPC from the Master.
664    *
665    * @param zkw zk reference
666    * @param region region to be transitioned to opening
667    * @param serverName server event originates from
668    * @return version of node after transition, -1 if unsuccessful transition
669    * @throws KeeperException if unexpected zookeeper exception
670    */
671   public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
672       HRegionInfo region, ServerName serverName, int expectedVersion)
673   throws KeeperException {
674     return transitionNode(zkw, region, serverName,
675         EventType.RS_ZK_REGION_OPENING,
676         EventType.RS_ZK_REGION_OPENING, expectedVersion);
677   }
678 
679   /**
680    * Transitions an existing unassigned node for the specified region which is
681    * currently in the OPENING state to be in the OPENED state.
682    *
683    * <p>Does not transition nodes from other states.  If for some reason the
684    * node could not be transitioned, the method returns -1.  If the transition
685    * is successful, the version of the node after transition is returned.
686    *
687    * <p>This method can fail and return false for three different reasons:
688    * <ul><li>Unassigned node for this region does not exist</li>
689    * <li>Unassigned node for this region is not in OPENING state</li>
690    * <li>After verifying OPENING state, update fails because of wrong version
691    * (this should never actually happen since an RS only does this transition
692    * following a transition to OPENING.  if two RS are conflicting, one would
693    * fail the original transition to OPENING and not this transition)</li>
694    * </ul>
695    *
696    * <p>Does not set any watches.
697    *
698    * <p>This method should only be used by a RegionServer when completing the
699    * open of a region.
700    *
701    * @param zkw zk reference
702    * @param region region to be transitioned to opened
703    * @param serverName server event originates from
704    * @return version of node after transition, -1 if unsuccessful transition
705    * @throws KeeperException if unexpected zookeeper exception
706    */
707   public static int transitionNodeOpened(ZooKeeperWatcher zkw,
708       HRegionInfo region, ServerName serverName, int expectedVersion)
709   throws KeeperException {
710     return transitionNode(zkw, region, serverName,
711         EventType.RS_ZK_REGION_OPENING,
712         EventType.RS_ZK_REGION_OPENED, expectedVersion);
713   }
714 
715   /**
716    * Method that actually performs unassigned node transitions.
717    *
718    * <p>Attempts to transition the unassigned node for the specified region
719    * from the expected state to the state in the specified transition data.
720    *
721    * <p>Method first reads existing data and verifies it is in the expected
722    * state.  If the node does not exist or the node is not in the expected
723    * state, the method returns -1.  If the transition is successful, the
724    * version number of the node following the transition is returned.
725    *
726    * <p>If the read state is what is expected, it attempts to write the new
727    * state and data into the node.  When doing this, it includes the expected
728    * version (determined when the existing state was verified) to ensure that
729    * only one transition is successful.  If there is a version mismatch, the
730    * method returns -1.
731    *
732    * <p>If the write is successful, no watch is set and the method returns true.
733    *
734    * @param zkw zk reference
735    * @param region region to be transitioned to opened
736    * @param serverName server event originates from
737    * @param endState state to transition node to if all checks pass
738    * @param beginState state the node must currently be in to do transition
739    * @param expectedVersion expected version of data before modification, or -1
740    * @return version of node after transition, -1 if unsuccessful transition
741    * @throws KeeperException if unexpected zookeeper exception
742    */
743   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
744       ServerName serverName, EventType beginState, EventType endState,
745       int expectedVersion)
746   throws KeeperException {
747     return transitionNode(zkw, region, serverName, beginState, endState,
748         expectedVersion, null);
749   }
750 
751   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
752       ServerName serverName, EventType beginState, EventType endState,
753       int expectedVersion, final byte [] payload)
754   throws KeeperException {
755     String encoded = region.getEncodedName();
756     if(LOG.isDebugEnabled()) {
757       LOG.debug(zkw.prefix("Attempting to transition node " +
758         HRegionInfo.prettyPrint(encoded) +
759         " from " + beginState.toString() + " to " + endState.toString()));
760     }
761 
762     String node = getNodeName(zkw, encoded);
763     zkw.sync(node);
764 
765     // Read existing data of the node
766     Stat stat = new Stat();
767     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
768     if (existingBytes == null) {
769       // Node no longer exists.  Return -1. It means unsuccessful transition.
770       return -1;
771     }
772     RegionTransitionData existingData =
773       RegionTransitionData.fromBytes(existingBytes);
774 
775     // Verify it is the expected version
776     if(expectedVersion != -1 && stat.getVersion() != expectedVersion) {
777       LOG.warn(zkw.prefix("Attempt to transition the " +
778         "unassigned node for " + encoded +
779         " from " + beginState + " to " + endState + " failed, " +
780         "the node existed but was version " + stat.getVersion() +
781         " not the expected version " + expectedVersion));
782         return -1;
783     } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
784         && endState.equals(EventType.RS_ZK_REGION_OPENING)
785         && expectedVersion == -1 && stat.getVersion() != 0) {
786       // the below check ensures that double assignment doesnot happen.
787       // When the node is created for the first time then the expected version
788       // that is passed will be -1 and the version in znode will be 0.
789       // In all other cases the version in znode will be > 0.
790       LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
791           + encoded + " from " + beginState + " to " + endState + " failed, "
792           + "the node existed but was version " + stat.getVersion()
793           + " not the expected version " + expectedVersion));
794       return -1;
795     }
796 
797     // Verify it is in expected state
798     if(!existingData.getEventType().equals(beginState)) {
799       LOG.warn(zkw.prefix("Attempt to transition the " +
800         "unassigned node for " + encoded +
801         " from " + beginState + " to " + endState + " failed, " +
802         "the node existed but was in the state " + existingData.getEventType() +
803         " set by the server " + serverName));
804       return -1;
805     }
806 
807     // Write new data, ensuring data has not changed since we last read it
808     try {
809       RegionTransitionData data = new RegionTransitionData(endState,
810           region.getRegionName(), serverName, payload);
811       if(!ZKUtil.setData(zkw, node, data.getBytes(), stat.getVersion())) {
812         LOG.warn(zkw.prefix("Attempt to transition the " +
813         "unassigned node for " + encoded +
814         " from " + beginState + " to " + endState + " failed, " +
815         "the node existed and was in the expected state but then when " +
816         "setting data we got a version mismatch"));
817         return -1;
818       }
819       if(LOG.isDebugEnabled()) {
820         LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
821           " from " + beginState + " to " + endState));
822       }
823       return stat.getVersion() + 1;
824     } catch (KeeperException.NoNodeException nne) {
825       LOG.warn(zkw.prefix("Attempt to transition the " +
826         "unassigned node for " + encoded +
827         " from " + beginState + " to " + endState + " failed, " +
828         "the node existed and was in the expected state but then when " +
829         "setting data it no longer existed"));
830       return -1;
831     }
832   }
833 
834   /**
835    * Gets the current data in the unassigned node for the specified region name
836    * or fully-qualified path.
837    *
838    * <p>Returns null if the region does not currently have a node.
839    *
840    * <p>Sets a watch on the node if the node exists.
841    *
842    * @param zkw zk reference
843    * @param pathOrRegionName fully-specified path or region name
844    * @return data for the unassigned node
845    * @throws KeeperException if unexpected zookeeper exception
846    */
847   public static RegionTransitionData getData(ZooKeeperWatcher zkw,
848       String pathOrRegionName)
849   throws KeeperException {
850     String node = pathOrRegionName.startsWith("/") ?
851         pathOrRegionName : getNodeName(zkw, pathOrRegionName);
852     byte [] data = ZKUtil.getDataAndWatch(zkw, node);
853     if(data == null) {
854       return null;
855     }
856     return RegionTransitionData.fromBytes(data);
857   }
858 
859   /**
860    * Gets the current data in the unassigned node for the specified region name
861    * or fully-qualified path.
862    *
863    * <p>Returns null if the region does not currently have a node.
864    *
865    * <p>Sets a watch on the node if the node exists.
866    *
867    * @param zkw zk reference
868    * @param pathOrRegionName fully-specified path or region name
869    * @param stat object to populate the version.
870    * @return data for the unassigned node
871    * @throws KeeperException if unexpected zookeeper exception
872    */
873   public static RegionTransitionData getDataAndWatch(ZooKeeperWatcher zkw,
874       String pathOrRegionName, Stat stat)
875   throws KeeperException {
876     String node = pathOrRegionName.startsWith("/") ?
877         pathOrRegionName : getNodeName(zkw, pathOrRegionName);
878     byte [] data = ZKUtil.getDataAndWatch(zkw, node, stat);
879     if(data == null) {
880       return null;
881     }
882     return RegionTransitionData.fromBytes(data);
883   }
884 
885   /**
886    * Gets the current data in the unassigned node for the specified region name
887    * or fully-qualified path.
888    *
889    * <p>Returns null if the region does not currently have a node.
890    *
891    * <p>Does not set a watch.
892    *
893    * @param zkw zk reference
894    * @param pathOrRegionName fully-specified path or region name
895    * @param stat object to store node info into on getData call
896    * @return data for the unassigned node or null if node does not exist
897    * @throws KeeperException if unexpected zookeeper exception
898    */
899   public static RegionTransitionData getDataNoWatch(ZooKeeperWatcher zkw,
900       String pathOrRegionName, Stat stat)
901   throws KeeperException {
902     String node = pathOrRegionName.startsWith("/") ?
903         pathOrRegionName : getNodeName(zkw, pathOrRegionName);
904     byte [] data = ZKUtil.getDataNoWatch(zkw, node, stat);
905     if (data == null) {
906       return null;
907     }
908     return RegionTransitionData.fromBytes(data);
909   }
910 
911   /**
912    * Get the version of the specified znode
913    * @param zkw zk reference
914    * @param region region's info
915    * @return the version of the znode, -1 if it doesn't exist
916    * @throws KeeperException
917    */
918   public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
919     throws KeeperException {
920     String znode = getNodeName(zkw, region.getEncodedName());
921     return ZKUtil.checkExists(zkw, znode);
922   }
923 
924   /**
925    * Delete the assignment node regardless of its current state.
926    * <p>
927    * Fail silent even if the node does not exist at all.
928    * @param watcher
929    * @param regionInfo
930    * @throws KeeperException
931    */
932   public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
933       HRegionInfo regionInfo)
934   throws KeeperException {
935     String node = getNodeName(watcher, regionInfo.getEncodedName());
936     ZKUtil.deleteNodeFailSilent(watcher, node);
937   }
938 
939   /**
940    * Blocks until there are no node in regions in transition.
941    * <p>
942    * Used in testing only.
943    * @param zkw zk reference
944    * @throws KeeperException
945    * @throws InterruptedException
946    */
947   public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
948   throws KeeperException, InterruptedException {
949     while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
950       List<String> znodes =
951         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
952       if (znodes != null && !znodes.isEmpty()) {
953         for (String znode : znodes) {
954           LOG.debug("ZK RIT -> " + znode);
955         }
956       }
957       Thread.sleep(100);
958     }
959   }
960 
961   /**
962    * Blocks until there is at least one node in regions in transition.
963    * <p>
964    * Used in testing only.
965    * @param zkw zk reference
966    * @throws KeeperException
967    * @throws InterruptedException
968    */
969   public static void blockUntilRIT(ZooKeeperWatcher zkw)
970   throws KeeperException, InterruptedException {
971     while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
972       List<String> znodes =
973         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
974       if (znodes == null || znodes.isEmpty()) {
975         LOG.debug("No RIT in ZK");
976       }
977       Thread.sleep(100);
978     }
979   }
980 
981   /**
982    * Verifies that the specified region is in the specified state in ZooKeeper.
983    * <p>
984    * Returns true if region is in transition and in the specified state in
985    * ZooKeeper.  Returns false if the region does not exist in ZK or is in
986    * a different state.
987    * <p>
988    * Method synchronizes() with ZK so will yield an up-to-date result but is
989    * a slow read.
990    * @param zkw
991    * @param region
992    * @param expectedState
993    * @return true if region exists and is in expected state
994    */
995   public static boolean verifyRegionState(ZooKeeperWatcher zkw,
996       HRegionInfo region, EventType expectedState)
997   throws KeeperException {
998     String encoded = region.getEncodedName();
999 
1000     String node = getNodeName(zkw, encoded);
1001     zkw.sync(node);
1002 
1003     // Read existing data of the node
1004     byte [] existingBytes = null;
1005     try {
1006       existingBytes = ZKUtil.getDataAndWatch(zkw, node);
1007     } catch (KeeperException.NoNodeException nne) {
1008       return false;
1009     } catch (KeeperException e) {
1010       throw e;
1011     }
1012     if (existingBytes == null) return false;
1013     RegionTransitionData existingData =
1014       RegionTransitionData.fromBytes(existingBytes);
1015     if (existingData.getEventType() == expectedState){
1016       return true;
1017     }
1018     return false;
1019   }
1020 }