View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import java.util.List;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.RegionTransition;
29  import org.apache.hadoop.hbase.ServerName;
30  import org.apache.hadoop.hbase.exceptions.DeserializationException;
31  import org.apache.hadoop.hbase.executor.EventType;
32  import org.apache.zookeeper.AsyncCallback;
33  import org.apache.zookeeper.KeeperException;
34  import org.apache.zookeeper.KeeperException.Code;
35  import org.apache.zookeeper.KeeperException.NoNodeException;
36  import org.apache.zookeeper.KeeperException.NodeExistsException;
37  import org.apache.zookeeper.data.Stat;
38  
39  // We should not be importing this Type here, nor a RegionTransition, etc.  This class should be
40  // about zk and bytes only.
41  
42  /**
43   * Utility class for doing region assignment in ZooKeeper.  This class extends
44   * stuff done in {@link ZKUtil} to cover specific assignment operations.
45   * <p>
46   * Contains only static methods and constants.
47   * <p>
48   * Used by both the Master and RegionServer.
49   * <p>
50   * All valid transitions outlined below:
51   * <p>
52   * <b>MASTER</b>
53   * <ol>
54   *   <li>
55   *     Master creates an unassigned node as OFFLINE.
56   *     - Cluster startup and table enabling.
57   *   </li>
58   *   <li>
59   *     Master forces an existing unassigned node to OFFLINE.
60   *     - RegionServer failure.
61   *     - Allows transitions from all states to OFFLINE.
62   *   </li>
63   *   <li>
64   *     Master deletes an unassigned node that was in a OPENED state.
65   *     - Normal region transitions.  Besides cluster startup, no other deletions
66   *     of unassigned nodes is allowed.
67   *   </li>
68   *   <li>
69   *     Master deletes all unassigned nodes regardless of state.
70   *     - Cluster startup before any assignment happens.
71   *   </li>
72   * </ol>
73   * <p>
74   * <b>REGIONSERVER</b>
75   * <ol>
76   *   <li>
77   *     RegionServer creates an unassigned node as CLOSING.
78   *     - All region closes will do this in response to a CLOSE RPC from Master.
79   *     - A node can never be transitioned to CLOSING, only created.
80   *   </li>
81   *   <li>
82   *     RegionServer transitions an unassigned node from CLOSING to CLOSED.
83   *     - Normal region closes.  CAS operation.
84   *   </li>
85   *   <li>
86   *     RegionServer transitions an unassigned node from OFFLINE to OPENING.
87   *     - All region opens will do this in response to an OPEN RPC from the Master.
88   *     - Normal region opens.  CAS operation.
89   *   </li>
90   *   <li>
91   *     RegionServer transitions an unassigned node from OPENING to OPENED.
92   *     - Normal region opens.  CAS operation.
93   *   </li>
94   * </ol>
95   */
96  @InterfaceAudience.Private
97  public class ZKAssign {
98    private static final Log LOG = LogFactory.getLog(ZKAssign.class);
99  
100   /**
101    * Gets the full path node name for the unassigned node for the specified
102    * region.
103    * @param zkw zk reference
104    * @param regionName region name
105    * @return full path node name
106    */
107   public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
108     return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
109   }
110 
111   /**
112    * Gets the region name from the full path node name of an unassigned node.
113    * @param path full zk path
114    * @return region name
115    */
116   public static String getRegionName(ZooKeeperWatcher zkw, String path) {
117     return path.substring(zkw.assignmentZNode.length()+1);
118   }
119 
120   // Master methods
121 
122   /**
123    * Creates a new unassigned node in the OFFLINE state for the specified region.
124    *
125    * <p>Does not transition nodes from other states.  If a node already exists
126    * for this region, a {@link NodeExistsException} will be thrown.
127    *
128    * <p>Sets a watcher on the unassigned region node if the method is successful.
129    *
130    * <p>This method should only be used during cluster startup and the enabling
131    * of a table.
132    *
133    * @param zkw zk reference
134    * @param region region to be created as offline
135    * @param serverName server transition will happen on
136    * @throws KeeperException if unexpected zookeeper exception
137    * @throws KeeperException.NodeExistsException if node already exists
138    */
139   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
140       ServerName serverName)
141   throws KeeperException, KeeperException.NodeExistsException {
142     createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
143   }
144 
145   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
146       ServerName serverName, final EventType event)
147   throws KeeperException, KeeperException.NodeExistsException {
148     LOG.debug(zkw.prefix("Creating unassigned node " +
149       region.getEncodedName() + " in OFFLINE state"));
150     RegionTransition rt =
151       RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
152     String node = getNodeName(zkw, region.getEncodedName());
153     ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
154   }
155 
156   /**
157    * Creates an unassigned node in the OFFLINE state for the specified region.
158    * <p>
159    * Runs asynchronously.  Depends on no pre-existing znode.
160    *
161    * <p>Sets a watcher on the unassigned region node.
162    *
163    * @param zkw zk reference
164    * @param region region to be created as offline
165    * @param serverName server transition will happen on
166    * @param cb
167    * @param ctx
168    * @throws KeeperException if unexpected zookeeper exception
169    * @throws KeeperException.NodeExistsException if node already exists
170    */
171   public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
172       HRegionInfo region, ServerName serverName,
173       final AsyncCallback.StringCallback cb, final Object ctx)
174   throws KeeperException {
175     LOG.debug(zkw.prefix("Async create of unassigned node " +
176       region.getEncodedName() + " with OFFLINE state"));
177     RegionTransition rt =
178       RegionTransition.createRegionTransition(
179           EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
180     String node = getNodeName(zkw, region.getEncodedName());
181     ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
182   }
183 
184   /**
185    * Creates or force updates an unassigned node to the OFFLINE state for the
186    * specified region.
187    * <p>
188    * Attempts to create the node but if it exists will force it to transition to
189    * and OFFLINE state.
190    *
191    * <p>Sets a watcher on the unassigned region node if the method is
192    * successful.
193    *
194    * <p>This method should be used when assigning a region.
195    *
196    * @param zkw zk reference
197    * @param region region to be created as offline
198    * @param serverName server transition will happen on
199    * @return the version of the znode created in OFFLINE state, -1 if
200    *         unsuccessful.
201    * @throws KeeperException if unexpected zookeeper exception
202    * @throws KeeperException.NodeExistsException if node already exists
203    */
204   public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
205       HRegionInfo region, ServerName serverName) throws KeeperException {
206     LOG.debug(zkw.prefix("Creating (or updating) unassigned node " +
207       region.getEncodedName() + " with OFFLINE state"));
208     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
209       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
210     byte [] data = rt.toByteArray();
211     String node = getNodeName(zkw, region.getEncodedName());
212     zkw.sync(node);
213     int version = ZKUtil.checkExists(zkw, node);
214     if (version == -1) {
215       return ZKUtil.createAndWatch(zkw, node, data);
216     } else {
217       boolean setData = false;
218       try {
219         setData = ZKUtil.setData(zkw, node, data, version);
220         // Setdata throws KeeperException which aborts the Master. So we are
221         // catching it here.
222         // If just before setting the znode to OFFLINE if the RS has made any
223         // change to the
224         // znode state then we need to return -1.
225       } catch (KeeperException kpe) {
226         LOG.info("Version mismatch while setting the node to OFFLINE state.");
227         return -1;
228       }
229       if (!setData) {
230         return -1;
231       } else {
232         // We successfully forced to OFFLINE, reset watch and handle if
233         // the state changed in between our set and the watch
234         byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
235         rt = getRegionTransition(bytes);
236         if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
237           // state changed, need to process
238           return -1;
239         }
240       }
241     }
242     return version + 1;
243   }
244 
245   /**
246    * Deletes an existing unassigned node that is in the OPENED state for the
247    * specified region.
248    *
249    * <p>If a node does not already exist for this region, a
250    * {@link NoNodeException} will be thrown.
251    *
252    * <p>No watcher is set whether this succeeds or not.
253    *
254    * <p>Returns false if the node was not in the proper state but did exist.
255    *
256    * <p>This method is used during normal region transitions when a region
257    * finishes successfully opening.  This is the Master acknowledging completion
258    * of the specified regions transition.
259    *
260    * @param zkw zk reference
261    * @param encodedRegionName opened region to be deleted from zk
262    * @param sn the expected region transition target server name
263    * @throws KeeperException if unexpected zookeeper exception
264    * @throws KeeperException.NoNodeException if node does not exist
265    */
266   public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
267       String encodedRegionName, ServerName sn)
268   throws KeeperException, KeeperException.NoNodeException {
269     return deleteNode(zkw, encodedRegionName,
270       EventType.RS_ZK_REGION_OPENED, sn);
271   }
272 
273   /**
274    * Deletes an existing unassigned node that is in the OFFLINE state for the
275    * specified region.
276    *
277    * <p>If a node does not already exist for this region, a
278    * {@link NoNodeException} will be thrown.
279    *
280    * <p>No watcher is set whether this succeeds or not.
281    *
282    * <p>Returns false if the node was not in the proper state but did exist.
283    *
284    * <p>This method is used during master failover when the regions on an RS
285    * that has died are all set to OFFLINE before being processed.
286    *
287    * @param zkw zk reference
288    * @param encodedRegionName closed region to be deleted from zk
289    * @param sn the expected region transition target server name
290    * @throws KeeperException if unexpected zookeeper exception
291    * @throws KeeperException.NoNodeException if node does not exist
292    */
293   public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
294       String encodedRegionName, ServerName sn)
295   throws KeeperException, KeeperException.NoNodeException {
296     return deleteNode(zkw, encodedRegionName,
297       EventType.M_ZK_REGION_OFFLINE, sn);
298   }
299 
300   /**
301    * Deletes an existing unassigned node that is in the CLOSED state for the
302    * specified region.
303    *
304    * <p>If a node does not already exist for this region, a
305    * {@link NoNodeException} will be thrown.
306    *
307    * <p>No watcher is set whether this succeeds or not.
308    *
309    * <p>Returns false if the node was not in the proper state but did exist.
310    *
311    * <p>This method is used during table disables when a region finishes
312    * successfully closing.  This is the Master acknowledging completion
313    * of the specified regions transition to being closed.
314    *
315    * @param zkw zk reference
316    * @param encodedRegionName closed region to be deleted from zk
317    * @param sn the expected region transition target server name
318    * @throws KeeperException if unexpected zookeeper exception
319    * @throws KeeperException.NoNodeException if node does not exist
320    */
321   public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
322       String encodedRegionName, ServerName sn)
323   throws KeeperException, KeeperException.NoNodeException {
324     return deleteNode(zkw, encodedRegionName,
325       EventType.RS_ZK_REGION_CLOSED, sn);
326   }
327 
328   /**
329    * Deletes an existing unassigned node that is in the CLOSING state for the
330    * specified region.
331    *
332    * <p>If a node does not already exist for this region, a
333    * {@link NoNodeException} will be thrown.
334    *
335    * <p>No watcher is set whether this succeeds or not.
336    *
337    * <p>Returns false if the node was not in the proper state but did exist.
338    *
339    * <p>This method is used during table disables when a region finishes
340    * successfully closing.  This is the Master acknowledging completion
341    * of the specified regions transition to being closed.
342    *
343    * @param zkw zk reference
344    * @param region closing region to be deleted from zk
345    * @param sn the expected region transition target server name
346    * @throws KeeperException if unexpected zookeeper exception
347    * @throws KeeperException.NoNodeException if node does not exist
348    */
349   public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
350       HRegionInfo region, ServerName sn)
351   throws KeeperException, KeeperException.NoNodeException {
352     String encodedRegionName = region.getEncodedName();
353     return deleteNode(zkw, encodedRegionName,
354       EventType.M_ZK_REGION_CLOSING, sn);
355   }
356 
357   /**
358    * Deletes an existing unassigned node that is in the specified state for the
359    * specified region.
360    *
361    * <p>If a node does not already exist for this region, a
362    * {@link NoNodeException} will be thrown.
363    *
364    * <p>No watcher is set whether this succeeds or not.
365    *
366    * <p>Returns false if the node was not in the proper state but did exist.
367    *
368    * <p>This method is used when a region finishes opening/closing.
369    * The Master acknowledges completion
370    * of the specified regions transition to being closed/opened.
371    *
372    * @param zkw zk reference
373    * @param encodedRegionName region to be deleted from zk
374    * @param expectedState state region must be in for delete to complete
375    * @param sn the expected region transition target server name
376    * @throws KeeperException if unexpected zookeeper exception
377    * @throws KeeperException.NoNodeException if node does not exist
378    */
379   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
380       EventType expectedState, ServerName sn)
381   throws KeeperException, KeeperException.NoNodeException {
382     return deleteNode(zkw, encodedRegionName, expectedState, sn, -1);
383   }
384 
385   /**
386    * Deletes an existing unassigned node that is in the specified state for the
387    * specified region.
388    *
389    * <p>If a node does not already exist for this region, a
390    * {@link NoNodeException} will be thrown.
391    *
392    * <p>No watcher is set whether this succeeds or not.
393    *
394    * <p>Returns false if the node was not in the proper state but did exist.
395    *
396    * <p>This method is used when a region finishes opening/closing.
397    * The Master acknowledges completion
398    * of the specified regions transition to being closed/opened.
399    *
400    * @param zkw zk reference
401    * @param encodedRegionName region to be deleted from zk
402    * @param expectedState state region must be in for delete to complete
403    * @param expectedVersion of the znode that is to be deleted.
404    *        If expectedVersion need not be compared while deleting the znode
405    *        pass -1
406    * @throws KeeperException if unexpected zookeeper exception
407    * @throws KeeperException.NoNodeException if node does not exist
408    */
409   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
410       EventType expectedState, int expectedVersion)
411   throws KeeperException, KeeperException.NoNodeException {
412     return deleteNode(zkw, encodedRegionName, expectedState, null, expectedVersion);
413   }
414 
415   /**
416    * Deletes an existing unassigned node that is in the specified state for the
417    * specified region.
418    *
419    * <p>If a node does not already exist for this region, a
420    * {@link NoNodeException} will be thrown.
421    *
422    * <p>No watcher is set whether this succeeds or not.
423    *
424    * <p>Returns false if the node was not in the proper state but did exist.
425    *
426    * <p>This method is used when a region finishes opening/closing.
427    * The Master acknowledges completion
428    * of the specified regions transition to being closed/opened.
429    *
430    * @param zkw zk reference
431    * @param encodedRegionName region to be deleted from zk
432    * @param expectedState state region must be in for delete to complete
433    * @param serverName the expected region transition target server name
434    * @param expectedVersion of the znode that is to be deleted.
435    *        If expectedVersion need not be compared while deleting the znode
436    *        pass -1
437    * @throws KeeperException if unexpected zookeeper exception
438    * @throws KeeperException.NoNodeException if node does not exist
439    */
440   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
441       EventType expectedState, ServerName serverName, int expectedVersion)
442   throws KeeperException, KeeperException.NoNodeException {
443     if (LOG.isTraceEnabled()) {
444     	LOG.trace(zkw.prefix("Deleting existing unassigned " +
445       "node " + encodedRegionName + " in expected state " + expectedState));
446     }
447     String node = getNodeName(zkw, encodedRegionName);
448     zkw.sync(node);
449     Stat stat = new Stat();
450     byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
451     if (bytes == null) {
452       // If it came back null, node does not exist.
453       throw KeeperException.create(Code.NONODE);
454     }
455     RegionTransition rt = getRegionTransition(bytes);
456     EventType et = rt.getEventType();
457     if (!et.equals(expectedState)) {
458       LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
459         expectedState + " state but node is in " + et + " state"));
460       return false;
461     }
462     // Verify the server transition happens on is not changed
463     if (serverName != null && !rt.getServerName().equals(serverName)) {
464       LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName
465         + " with target " + serverName + " but node has " + rt.getServerName()));
466       return false;
467     }
468     if (expectedVersion != -1
469         && stat.getVersion() != expectedVersion) {
470       LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
471         " the expected one. Got a version mismatch");
472       return false;
473     }
474     if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
475       LOG.warn(zkw.prefix("Attempting to delete " +
476           "unassigned node " + encodedRegionName + " in " + expectedState +
477           " state but after verifying state, we got a version mismatch"));
478       return false;
479     }
480     LOG.debug(zkw.prefix("Deleted unassigned node " +
481         encodedRegionName + " in expected state " + expectedState));
482     return true;
483   }
484 
485   /**
486    * Deletes all unassigned nodes regardless of their state.
487    *
488    * <p>No watchers are set.
489    *
490    * <p>This method is used by the Master during cluster startup to clear out
491    * any existing state from other cluster runs.
492    *
493    * @param zkw zk reference
494    * @throws KeeperException if unexpected zookeeper exception
495    */
496   public static void deleteAllNodes(ZooKeeperWatcher zkw)
497   throws KeeperException {
498     LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
499     ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
500   }
501 
502   /**
503    * Creates a new unassigned node in the CLOSING state for the specified
504    * region.
505    *
506    * <p>Does not transition nodes from any states.  If a node already exists
507    * for this region, a {@link NodeExistsException} will be thrown.
508    *
509    * <p>If creation is successful, returns the version number of the CLOSING
510    * node created.
511    *
512    * <p>Set a watch.
513    *
514    * <p>This method should only be used by a Master when initiating a
515    * close of a region before sending a close request to the region server.
516    *
517    * @param zkw zk reference
518    * @param region region to be created as closing
519    * @param serverName server transition will happen on
520    * @return version of node after transition, -1 if unsuccessful transition
521    * @throws KeeperException if unexpected zookeeper exception
522    * @throws KeeperException.NodeExistsException if node already exists
523    */
524   public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
525       ServerName serverName)
526   throws KeeperException, KeeperException.NodeExistsException {
527     LOG.debug(zkw.prefix("Creating unassigned node " +
528       region.getEncodedName() + " in a CLOSING state"));
529     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
530       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
531     String node = getNodeName(zkw, region.getEncodedName());
532     return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
533   }
534 
535   // RegionServer methods
536 
537   /**
538    * Transitions an existing unassigned node for the specified region which is
539    * currently in the CLOSING state to be in the CLOSED state.
540    *
541    * <p>Does not transition nodes from other states.  If for some reason the
542    * node could not be transitioned, the method returns -1.  If the transition
543    * is successful, the version of the node after transition is returned.
544    *
545    * <p>This method can fail and return false for three different reasons:
546    * <ul><li>Unassigned node for this region does not exist</li>
547    * <li>Unassigned node for this region is not in CLOSING state</li>
548    * <li>After verifying CLOSING state, update fails because of wrong version
549    * (someone else already transitioned the node)</li>
550    * </ul>
551    *
552    * <p>Does not set any watches.
553    *
554    * <p>This method should only be used by a RegionServer when initiating a
555    * close of a region after receiving a CLOSE RPC from the Master.
556    *
557    * @param zkw zk reference
558    * @param region region to be transitioned to closed
559    * @param serverName server transition happens on
560    * @return version of node after transition, -1 if unsuccessful transition
561    * @throws KeeperException if unexpected zookeeper exception
562    */
563   public static int transitionNodeClosed(ZooKeeperWatcher zkw,
564       HRegionInfo region, ServerName serverName, int expectedVersion)
565   throws KeeperException {
566     return transitionNode(zkw, region, serverName,
567         EventType.M_ZK_REGION_CLOSING,
568         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
569   }
570 
571   /**
572    * Transitions an existing unassigned node for the specified region which is
573    * currently in the OFFLINE state to be in the OPENING state.
574    *
575    * <p>Does not transition nodes from other states.  If for some reason the
576    * node could not be transitioned, the method returns -1.  If the transition
577    * is successful, the version of the node written as OPENING is returned.
578    *
579    * <p>This method can fail and return -1 for three different reasons:
580    * <ul><li>Unassigned node for this region does not exist</li>
581    * <li>Unassigned node for this region is not in OFFLINE state</li>
582    * <li>After verifying OFFLINE state, update fails because of wrong version
583    * (someone else already transitioned the node)</li>
584    * </ul>
585    *
586    * <p>Does not set any watches.
587    *
588    * <p>This method should only be used by a RegionServer when initiating an
589    * open of a region after receiving an OPEN RPC from the Master.
590    *
591    * @param zkw zk reference
592    * @param region region to be transitioned to opening
593    * @param serverName server transition happens on
594    * @return version of node after transition, -1 if unsuccessful transition
595    * @throws KeeperException if unexpected zookeeper exception
596    */
597   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
598       HRegionInfo region, ServerName serverName)
599   throws KeeperException {
600     return transitionNodeOpening(zkw, region, serverName,
601       EventType.M_ZK_REGION_OFFLINE);
602   }
603 
604   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
605       HRegionInfo region, ServerName serverName, final EventType beginState)
606   throws KeeperException {
607     return transitionNode(zkw, region, serverName, beginState,
608       EventType.RS_ZK_REGION_OPENING, -1);
609   }
610 
611   /**
612    * Confirm an existing unassigned node for the specified region which is
613    * currently in the OPENING state to be still in the OPENING state on
614    * the specified server.
615    *
616    * <p>If for some reason the check fails, the method returns -1. Otherwise,
617    * the version of the node (same as the expected version) is returned.
618    *
619    * <p>This method can fail and return -1 for three different reasons:
620    * <ul><li>Unassigned node for this region does not exist</li>
621    * <li>Unassigned node for this region is not in OPENING state</li>
622    * <li>After verifying OPENING state, the server name or the version of the
623    * doesn't match)</li>
624    * </ul>
625    *
626    * <p>Does not set any watches.
627    *
628    * <p>This method should only be used by a RegionServer when initiating an
629    * open of a region after receiving an OPEN RPC from the Master.
630    *
631    * @param zkw zk reference
632    * @param region region to be transitioned to opening
633    * @param serverName server transition happens on
634    * @return version of node after transition, -1 if unsuccessful transition
635    * @throws KeeperException if unexpected zookeeper exception
636    */
637   public static int confirmNodeOpening(ZooKeeperWatcher zkw,
638       HRegionInfo region, ServerName serverName, int expectedVersion)
639   throws KeeperException {
640 
641     String encoded = region.getEncodedName();
642     if(LOG.isDebugEnabled()) {
643       LOG.debug(zkw.prefix("Attempting to retransition opening state of node " +
644           HRegionInfo.prettyPrint(encoded)));
645     }
646 
647     String node = getNodeName(zkw, encoded);
648     zkw.sync(node);
649 
650     // Read existing data of the node
651     Stat stat = new Stat();
652     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
653     if (existingBytes == null) {
654       // Node no longer exists.  Return -1. It means unsuccessful transition.
655       return -1;
656     }
657     RegionTransition rt = getRegionTransition(existingBytes);
658 
659     // Verify it is the expected version
660     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
661       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
662           "unassigned node for " + encoded + " failed, " +
663           "the node existed but was version " + stat.getVersion() +
664           " not the expected version " + expectedVersion));
665       return -1;
666     }
667 
668     // Verify it is in expected state
669     EventType et = rt.getEventType();
670     if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
671       String existingServer = (rt.getServerName() == null)
672           ? "<unknown>" : rt.getServerName().toString();
673       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
674           + encoded + " failed, the node existed but was in the state " + et +
675           " set by the server " + existingServer));
676       return -1;
677     }
678 
679     return expectedVersion;
680   }
681 
682   /**
683    * Transitions an existing unassigned node for the specified region which is
684    * currently in the OPENING state to be in the OPENED state.
685    *
686    * <p>Does not transition nodes from other states.  If for some reason the
687    * node could not be transitioned, the method returns -1.  If the transition
688    * is successful, the version of the node after transition is returned.
689    *
690    * <p>This method can fail and return false for three different reasons:
691    * <ul><li>Unassigned node for this region does not exist</li>
692    * <li>Unassigned node for this region is not in OPENING state</li>
693    * <li>After verifying OPENING state, update fails because of wrong version
694    * (this should never actually happen since an RS only does this transition
695    * following a transition to OPENING.  if two RS are conflicting, one would
696    * fail the original transition to OPENING and not this transition)</li>
697    * </ul>
698    *
699    * <p>Does not set any watches.
700    *
701    * <p>This method should only be used by a RegionServer when completing the
702    * open of a region.
703    *
704    * @param zkw zk reference
705    * @param region region to be transitioned to opened
706    * @param serverName server transition happens on
707    * @return version of node after transition, -1 if unsuccessful transition
708    * @throws KeeperException if unexpected zookeeper exception
709    */
710   public static int transitionNodeOpened(ZooKeeperWatcher zkw,
711       HRegionInfo region, ServerName serverName, int expectedVersion)
712   throws KeeperException {
713     return transitionNode(zkw, region, serverName,
714         EventType.RS_ZK_REGION_OPENING,
715         EventType.RS_ZK_REGION_OPENED, expectedVersion);
716   }
717 
718   /**
719    *
720    * @param zkw zk reference
721    * @param region region to be closed
722    * @param expectedVersion expected version of the znode
723    * @return true if the znode exists, has the right version and the right state. False otherwise.
724    * @throws KeeperException
725    */
726   public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
727                                           int expectedVersion) throws KeeperException {
728 
729     final String encoded = getNodeName(zkw, region.getEncodedName());
730     zkw.sync(encoded);
731 
732     // Read existing data of the node
733     Stat stat = new Stat();
734     byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
735 
736     if (existingBytes == null) {
737       LOG.warn(zkw.prefix("Attempt to check the " +
738           "closing node for " + encoded +
739           ". The node does not exist"));
740       return false;
741     }
742 
743     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
744       LOG.warn(zkw.prefix("Attempt to check the " +
745           "closing node for " + encoded +
746           ". The node existed but was version " + stat.getVersion() +
747           " not the expected version " + expectedVersion));
748       return false;
749     }
750 
751     RegionTransition rt = getRegionTransition(existingBytes);
752 
753     if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
754       LOG.warn(zkw.prefix("Attempt to check the " +
755           "closing node for " + encoded +
756           ". The node existed but was in an unexpected state: " + rt.getEventType()));
757       return false;
758     }
759 
760     return true;
761   }
762 
763   /**
764    * Method that actually performs unassigned node transitions.
765    *
766    * <p>Attempts to transition the unassigned node for the specified region
767    * from the expected state to the state in the specified transition data.
768    *
769    * <p>Method first reads existing data and verifies it is in the expected
770    * state.  If the node does not exist or the node is not in the expected
771    * state, the method returns -1.  If the transition is successful, the
772    * version number of the node following the transition is returned.
773    *
774    * <p>If the read state is what is expected, it attempts to write the new
775    * state and data into the node.  When doing this, it includes the expected
776    * version (determined when the existing state was verified) to ensure that
777    * only one transition is successful.  If there is a version mismatch, the
778    * method returns -1.
779    *
780    * <p>If the write is successful, no watch is set and the method returns true.
781    *
782    * @param zkw zk reference
783    * @param region region to be transitioned to opened
784    * @param serverName server transition happens on
785    * @param endState state to transition node to if all checks pass
786    * @param beginState state the node must currently be in to do transition
787    * @param expectedVersion expected version of data before modification, or -1
788    * @return version of node after transition, -1 if unsuccessful transition
789    * @throws KeeperException if unexpected zookeeper exception
790    */
791   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
792       ServerName serverName, EventType beginState, EventType endState,
793       int expectedVersion)
794   throws KeeperException {
795     return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
796   }
797 
798 
799   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
800       ServerName serverName, EventType beginState, EventType endState,
801       int expectedVersion, final byte [] payload)
802   throws KeeperException {
803     String encoded = region.getEncodedName();
804     if(LOG.isDebugEnabled()) {
805       LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
806         " from " + beginState.toString() + " to " + endState.toString()));
807     }
808 
809     String node = getNodeName(zkw, encoded);
810     zkw.sync(node);
811 
812     // Read existing data of the node
813     Stat stat = new Stat();
814     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
815     if (existingBytes == null) {
816       // Node no longer exists.  Return -1. It means unsuccessful transition.
817       return -1;
818     }
819 
820     // Verify it is the expected version
821     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
822       LOG.warn(zkw.prefix("Attempt to transition the " +
823         "unassigned node for " + encoded +
824         " from " + beginState + " to " + endState + " failed, " +
825         "the node existed but was version " + stat.getVersion() +
826         " not the expected version " + expectedVersion));
827         return -1;
828     }
829 
830     if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
831         && endState.equals(EventType.RS_ZK_REGION_OPENING)
832         && expectedVersion == -1 && stat.getVersion() != 0) {
833       // the below check ensures that double assignment doesnot happen.
834       // When the node is created for the first time then the expected version
835       // that is passed will be -1 and the version in znode will be 0.
836       // In all other cases the version in znode will be > 0.
837       LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
838           + encoded + " from " + beginState + " to " + endState + " failed, "
839           + "the node existed but was version " + stat.getVersion()
840           + " not the expected version " + expectedVersion));
841       return -1;
842     }
843 
844     RegionTransition rt = getRegionTransition(existingBytes);
845 
846     // Verify the server transition happens on is not changed
847     if (!rt.getServerName().equals(serverName)) {
848       LOG.warn(zkw.prefix("Attempt to transition the " +
849         "unassigned node for " + encoded +
850         " from " + beginState + " to " + endState + " failed, " +
851         "the server that tried to transition was " + serverName +
852         " not the expected " + rt.getServerName()));
853       return -1;
854     }
855 
856     // Verify it is in expected state
857     EventType et = rt.getEventType();
858     if (!et.equals(beginState)) {
859       String existingServer = (rt.getServerName() == null)
860         ? "<unknown>" : rt.getServerName().toString();
861       LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
862         + " from " + beginState + " to " + endState + " failed, the node existed but"
863         + " was in the state " + et + " set by the server " + existingServer));
864       return -1;
865     }
866 
867     // Write new data, ensuring data has not changed since we last read it
868     try {
869       rt = RegionTransition.createRegionTransition(
870           endState, region.getRegionName(), serverName, payload);
871       if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
872         LOG.warn(zkw.prefix("Attempt to transition the " +
873         "unassigned node for " + encoded +
874         " from " + beginState + " to " + endState + " failed, " +
875         "the node existed and was in the expected state but then when " +
876         "setting data we got a version mismatch"));
877         return -1;
878       }
879       if(LOG.isDebugEnabled()) {
880         LOG.debug(zkw.prefix("Transitioned node " + encoded +
881           " from " + beginState + " to " + endState));
882       }
883       return stat.getVersion() + 1;
884     } catch (KeeperException.NoNodeException nne) {
885       LOG.warn(zkw.prefix("Attempt to transition the " +
886         "unassigned node for " + encoded +
887         " from " + beginState + " to " + endState + " failed, " +
888         "the node existed and was in the expected state but then when " +
889         "setting data it no longer existed"));
890       return -1;
891     }
892   }
893 
894   private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
895     try {
896       return RegionTransition.parseFrom(bytes);
897     } catch (DeserializationException e) {
898       // Convert to a zk exception for now.  Otherwise have to change API
899       throw ZKUtil.convert(e);
900     }
901   }
902 
903   /**
904    * Gets the current data in the unassigned node for the specified region name
905    * or fully-qualified path.
906    *
907    * <p>Returns null if the region does not currently have a node.
908    *
909    * <p>Sets a watch on the node if the node exists.
910    *
911    * @param zkw zk reference
912    * @param pathOrRegionName fully-specified path or region name
913    * @return znode content
914    * @throws KeeperException if unexpected zookeeper exception
915    */
916   public static byte [] getData(ZooKeeperWatcher zkw,
917       String pathOrRegionName)
918   throws KeeperException {
919     String node = getPath(zkw, pathOrRegionName);
920     return ZKUtil.getDataAndWatch(zkw, node);
921   }
922 
923   /**
924    * Gets the current data in the unassigned node for the specified region name
925    * or fully-qualified path.
926    *
927    * <p>Returns null if the region does not currently have a node.
928    *
929    * <p>Sets a watch on the node if the node exists.
930    *
931    * @param zkw zk reference
932    * @param pathOrRegionName fully-specified path or region name
933    * @param stat object to populate the version.
934    * @return znode content
935    * @throws KeeperException if unexpected zookeeper exception
936    */
937   public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
938       String pathOrRegionName, Stat stat)
939   throws KeeperException {
940     String node = getPath(zkw, pathOrRegionName);
941     return ZKUtil.getDataAndWatch(zkw, node, stat);
942   }
943 
944   /**
945    * Gets the current data in the unassigned node for the specified region name
946    * or fully-qualified path.
947    *
948    * <p>Returns null if the region does not currently have a node.
949    *
950    * <p>Does not set a watch.
951    *
952    * @param zkw zk reference
953    * @param pathOrRegionName fully-specified path or region name
954    * @param stat object to store node info into on getData call
955    * @return znode content
956    * @throws KeeperException if unexpected zookeeper exception
957    */
958   public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
959       String pathOrRegionName, Stat stat)
960   throws KeeperException {
961     String node = getPath(zkw, pathOrRegionName);
962     return ZKUtil.getDataNoWatch(zkw, node, stat);
963   }
964 
965   /**
966    * @param zkw
967    * @param pathOrRegionName
968    * @return Path to znode
969    */
970   public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
971     return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
972   }
973 
974   /**
975    * Get the version of the specified znode
976    * @param zkw zk reference
977    * @param region region's info
978    * @return the version of the znode, -1 if it doesn't exist
979    * @throws KeeperException
980    */
981   public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
982     throws KeeperException {
983     String znode = getNodeName(zkw, region.getEncodedName());
984     return ZKUtil.checkExists(zkw, znode);
985   }
986 
987   /**
988    * Delete the assignment node regardless of its current state.
989    * <p>
990    * Fail silent even if the node does not exist at all.
991    * @param watcher
992    * @param regionInfo
993    * @throws KeeperException
994    */
995   public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
996       HRegionInfo regionInfo)
997   throws KeeperException {
998     String node = getNodeName(watcher, regionInfo.getEncodedName());
999     ZKUtil.deleteNodeFailSilent(watcher, node);
1000   }
1001 
1002   /**
1003    * Blocks until there are no node in regions in transition.
1004    * <p>
1005    * Used in testing only.
1006    * @param zkw zk reference
1007    * @throws KeeperException
1008    * @throws InterruptedException
1009    */
1010   public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
1011   throws KeeperException, InterruptedException {
1012     while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1013       List<String> znodes =
1014         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1015       if (znodes != null && !znodes.isEmpty()) {
1016         LOG.debug("Waiting on RIT: " + znodes);
1017       }
1018       Thread.sleep(100);
1019     }
1020   }
1021 
1022   /**
1023    * Blocks until there is at least one node in regions in transition.
1024    * <p>
1025    * Used in testing only.
1026    * @param zkw zk reference
1027    * @throws KeeperException
1028    * @throws InterruptedException
1029    */
1030   public static void blockUntilRIT(ZooKeeperWatcher zkw)
1031   throws KeeperException, InterruptedException {
1032     while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1033       List<String> znodes =
1034         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1035       if (znodes == null || znodes.isEmpty()) {
1036         LOG.debug("No RIT in ZK");
1037       }
1038       Thread.sleep(100);
1039     }
1040   }
1041 
1042   /**
1043    * Presume bytes are serialized unassigned data structure
1044    * @param znodeBytes
1045    * @return String of the deserialized znode bytes.
1046    */
1047   static String toString(final byte[] znodeBytes) {
1048     // This method should not exist.  Used by ZKUtil stringifying RegionTransition.  Have the
1049     // method in here so RegionTransition does not leak into ZKUtil.
1050     try {
1051       RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1052       return rt.toString();
1053     } catch (DeserializationException e) {
1054       return "";
1055     }
1056   }
1057 }