View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import java.util.List;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.RegionTransition;
29  import org.apache.hadoop.hbase.ServerName;
30  import org.apache.hadoop.hbase.exceptions.DeserializationException;
31  import org.apache.hadoop.hbase.executor.EventType;
32  import org.apache.zookeeper.AsyncCallback;
33  import org.apache.zookeeper.KeeperException;
34  import org.apache.zookeeper.KeeperException.Code;
35  import org.apache.zookeeper.KeeperException.NoNodeException;
36  import org.apache.zookeeper.KeeperException.NodeExistsException;
37  import org.apache.zookeeper.data.Stat;
38  
39  // We should not be importing this Type here, nor a RegionTransition, etc.  This class should be
40  // about zk and bytes only.
41  
42  /**
43   * Utility class for doing region assignment in ZooKeeper.  This class extends
44   * stuff done in {@link ZKUtil} to cover specific assignment operations.
45   * <p>
46   * Contains only static methods and constants.
47   * <p>
48   * Used by both the Master and RegionServer.
49   * <p>
50   * All valid transitions outlined below:
51   * <p>
52   * <b>MASTER</b>
53   * <ol>
54   *   <li>
55   *     Master creates an unassigned node as OFFLINE.
56   *     - Cluster startup and table enabling.
57   *   </li>
58   *   <li>
59   *     Master forces an existing unassigned node to OFFLINE.
60   *     - RegionServer failure.
61   *     - Allows transitions from all states to OFFLINE.
62   *   </li>
63   *   <li>
64   *     Master deletes an unassigned node that was in a OPENED state.
65   *     - Normal region transitions.  Besides cluster startup, no other deletions
66   *     of unassigned nodes is allowed.
67   *   </li>
68   *   <li>
69   *     Master deletes all unassigned nodes regardless of state.
70   *     - Cluster startup before any assignment happens.
71   *   </li>
72   * </ol>
73   * <p>
74   * <b>REGIONSERVER</b>
75   * <ol>
76   *   <li>
77   *     RegionServer creates an unassigned node as CLOSING.
78   *     - All region closes will do this in response to a CLOSE RPC from Master.
79   *     - A node can never be transitioned to CLOSING, only created.
80   *   </li>
81   *   <li>
82   *     RegionServer transitions an unassigned node from CLOSING to CLOSED.
83   *     - Normal region closes.  CAS operation.
84   *   </li>
85   *   <li>
86   *     RegionServer transitions an unassigned node from OFFLINE to OPENING.
87   *     - All region opens will do this in response to an OPEN RPC from the Master.
88   *     - Normal region opens.  CAS operation.
89   *   </li>
90   *   <li>
91   *     RegionServer transitions an unassigned node from OPENING to OPENED.
92   *     - Normal region opens.  CAS operation.
93   *   </li>
94   * </ol>
95   */
96  @InterfaceAudience.Private
97  public class ZKAssign {
98    private static final Log LOG = LogFactory.getLog(ZKAssign.class);
99  
100   /**
101    * Gets the full path node name for the unassigned node for the specified
102    * region.
103    * @param zkw zk reference
104    * @param regionName region name
105    * @return full path node name
106    */
107   public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
108     return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
109   }
110 
111   /**
112    * Gets the region name from the full path node name of an unassigned node.
113    * @param path full zk path
114    * @return region name
115    */
116   public static String getRegionName(ZooKeeperWatcher zkw, String path) {
117     return path.substring(zkw.assignmentZNode.length()+1);
118   }
119 
120   // Master methods
121 
122   /**
123    * Creates a new unassigned node in the OFFLINE state for the specified region.
124    *
125    * <p>Does not transition nodes from other states.  If a node already exists
126    * for this region, a {@link NodeExistsException} will be thrown.
127    *
128    * <p>Sets a watcher on the unassigned region node if the method is successful.
129    *
130    * <p>This method should only be used during cluster startup and the enabling
131    * of a table.
132    *
133    * @param zkw zk reference
134    * @param region region to be created as offline
135    * @param serverName server transition will happen on
136    * @throws KeeperException if unexpected zookeeper exception
137    * @throws KeeperException.NodeExistsException if node already exists
138    */
139   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
140       ServerName serverName)
141   throws KeeperException, KeeperException.NodeExistsException {
142     createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
143   }
144 
145   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
146       ServerName serverName, final EventType event)
147   throws KeeperException, KeeperException.NodeExistsException {
148     LOG.debug(zkw.prefix("Creating unassigned node " +
149       region.getEncodedName() + " in OFFLINE state"));
150     RegionTransition rt =
151       RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
152     String node = getNodeName(zkw, region.getEncodedName());
153     ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
154   }
155 
156   /**
157    * Creates an unassigned node in the OFFLINE state for the specified region.
158    * <p>
159    * Runs asynchronously.  Depends on no pre-existing znode.
160    *
161    * <p>Sets a watcher on the unassigned region node.
162    *
163    * @param zkw zk reference
164    * @param region region to be created as offline
165    * @param serverName server transition will happen on
166    * @param cb
167    * @param ctx
168    * @throws KeeperException if unexpected zookeeper exception
169    * @throws KeeperException.NodeExistsException if node already exists
170    */
171   public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
172       HRegionInfo region, ServerName serverName,
173       final AsyncCallback.StringCallback cb, final Object ctx)
174   throws KeeperException {
175     LOG.debug(zkw.prefix("Async create of unassigned node " +
176       region.getEncodedName() + " with OFFLINE state"));
177     RegionTransition rt =
178       RegionTransition.createRegionTransition(
179           EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
180     String node = getNodeName(zkw, region.getEncodedName());
181     ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
182   }
183 
184   /**
185    * Creates or force updates an unassigned node to the OFFLINE state for the
186    * specified region.
187    * <p>
188    * Attempts to create the node but if it exists will force it to transition to
189    * and OFFLINE state.
190    *
191    * <p>Sets a watcher on the unassigned region node if the method is
192    * successful.
193    *
194    * <p>This method should be used when assigning a region.
195    *
196    * @param zkw zk reference
197    * @param region region to be created as offline
198    * @param serverName server transition will happen on
199    * @return the version of the znode created in OFFLINE state, -1 if
200    *         unsuccessful.
201    * @throws KeeperException if unexpected zookeeper exception
202    * @throws KeeperException.NodeExistsException if node already exists
203    */
204   public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
205       HRegionInfo region, ServerName serverName) throws KeeperException {
206     LOG.debug(zkw.prefix("Creating (or updating) unassigned node " +
207       region.getEncodedName() + " with OFFLINE state"));
208     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
209       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
210     byte [] data = rt.toByteArray();
211     String node = getNodeName(zkw, region.getEncodedName());
212     zkw.sync(node);
213     int version = ZKUtil.checkExists(zkw, node);
214     if (version == -1) {
215       return ZKUtil.createAndWatch(zkw, node, data);
216     } else {
217       boolean setData = false;
218       try {
219         setData = ZKUtil.setData(zkw, node, data, version);
220         // Setdata throws KeeperException which aborts the Master. So we are
221         // catching it here.
222         // If just before setting the znode to OFFLINE if the RS has made any
223         // change to the
224         // znode state then we need to return -1.
225       } catch (KeeperException kpe) {
226         LOG.info("Version mismatch while setting the node to OFFLINE state.");
227         return -1;
228       }
229       if (!setData) {
230         return -1;
231       } else {
232         // We successfully forced to OFFLINE, reset watch and handle if
233         // the state changed in between our set and the watch
234         byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
235         rt = getRegionTransition(bytes);
236         if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
237           // state changed, need to process
238           return -1;
239         }
240       }
241     }
242     return version + 1;
243   }
244 
245   /**
246    * Deletes an existing unassigned node that is in the OPENED state for the
247    * specified region.
248    *
249    * <p>If a node does not already exist for this region, a
250    * {@link NoNodeException} will be thrown.
251    *
252    * <p>No watcher is set whether this succeeds or not.
253    *
254    * <p>Returns false if the node was not in the proper state but did exist.
255    *
256    * <p>This method is used during normal region transitions when a region
257    * finishes successfully opening.  This is the Master acknowledging completion
258    * of the specified regions transition.
259    *
260    * @param zkw zk reference
261    * @param encodedRegionName opened region to be deleted from zk
262    * @param sn the expected region transition target server name
263    * @throws KeeperException if unexpected zookeeper exception
264    * @throws KeeperException.NoNodeException if node does not exist
265    */
266   public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
267       String encodedRegionName, ServerName sn)
268   throws KeeperException, KeeperException.NoNodeException {
269     return deleteNode(zkw, encodedRegionName,
270       EventType.RS_ZK_REGION_OPENED, sn);
271   }
272 
273   /**
274    * Deletes an existing unassigned node that is in the OFFLINE state for the
275    * specified region.
276    *
277    * <p>If a node does not already exist for this region, a
278    * {@link NoNodeException} will be thrown.
279    *
280    * <p>No watcher is set whether this succeeds or not.
281    *
282    * <p>Returns false if the node was not in the proper state but did exist.
283    *
284    * <p>This method is used during master failover when the regions on an RS
285    * that has died are all set to OFFLINE before being processed.
286    *
287    * @param zkw zk reference
288    * @param encodedRegionName closed region to be deleted from zk
289    * @param sn the expected region transition target server name
290    * @throws KeeperException if unexpected zookeeper exception
291    * @throws KeeperException.NoNodeException if node does not exist
292    */
293   public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
294       String encodedRegionName, ServerName sn)
295   throws KeeperException, KeeperException.NoNodeException {
296     return deleteNode(zkw, encodedRegionName,
297       EventType.M_ZK_REGION_OFFLINE, sn);
298   }
299 
300   /**
301    * Deletes an existing unassigned node that is in the CLOSED state for the
302    * specified region.
303    *
304    * <p>If a node does not already exist for this region, a
305    * {@link NoNodeException} will be thrown.
306    *
307    * <p>No watcher is set whether this succeeds or not.
308    *
309    * <p>Returns false if the node was not in the proper state but did exist.
310    *
311    * <p>This method is used during table disables when a region finishes
312    * successfully closing.  This is the Master acknowledging completion
313    * of the specified regions transition to being closed.
314    *
315    * @param zkw zk reference
316    * @param encodedRegionName closed region to be deleted from zk
317    * @param sn the expected region transition target server name
318    * @throws KeeperException if unexpected zookeeper exception
319    * @throws KeeperException.NoNodeException if node does not exist
320    */
321   public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
322       String encodedRegionName, ServerName sn)
323   throws KeeperException, KeeperException.NoNodeException {
324     return deleteNode(zkw, encodedRegionName,
325       EventType.RS_ZK_REGION_CLOSED, sn);
326   }
327 
328   /**
329    * Deletes an existing unassigned node that is in the CLOSING state for the
330    * specified region.
331    *
332    * <p>If a node does not already exist for this region, a
333    * {@link NoNodeException} will be thrown.
334    *
335    * <p>No watcher is set whether this succeeds or not.
336    *
337    * <p>Returns false if the node was not in the proper state but did exist.
338    *
339    * <p>This method is used during table disables when a region finishes
340    * successfully closing.  This is the Master acknowledging completion
341    * of the specified regions transition to being closed.
342    *
343    * @param zkw zk reference
344    * @param region closing region to be deleted from zk
345    * @param sn the expected region transition target server name
346    * @throws KeeperException if unexpected zookeeper exception
347    * @throws KeeperException.NoNodeException if node does not exist
348    */
349   public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
350       HRegionInfo region, ServerName sn)
351   throws KeeperException, KeeperException.NoNodeException {
352     String encodedRegionName = region.getEncodedName();
353     return deleteNode(zkw, encodedRegionName,
354       EventType.M_ZK_REGION_CLOSING, sn);
355   }
356 
357   /**
358    * Deletes an existing unassigned node that is in the specified state for the
359    * specified region.
360    *
361    * <p>If a node does not already exist for this region, a
362    * {@link NoNodeException} will be thrown.
363    *
364    * <p>No watcher is set whether this succeeds or not.
365    *
366    * <p>Returns false if the node was not in the proper state but did exist.
367    *
368    * <p>This method is used when a region finishes opening/closing.
369    * The Master acknowledges completion
370    * of the specified regions transition to being closed/opened.
371    *
372    * @param zkw zk reference
373    * @param encodedRegionName region to be deleted from zk
374    * @param expectedState state region must be in for delete to complete
375    * @param sn the expected region transition target server name
376    * @throws KeeperException if unexpected zookeeper exception
377    * @throws KeeperException.NoNodeException if node does not exist
378    */
379   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
380       EventType expectedState, ServerName sn)
381   throws KeeperException, KeeperException.NoNodeException {
382     return deleteNode(zkw, encodedRegionName, expectedState, sn, -1);
383   }
384 
385   /**
386    * Deletes an existing unassigned node that is in the specified state for the
387    * specified region.
388    *
389    * <p>If a node does not already exist for this region, a
390    * {@link NoNodeException} will be thrown.
391    *
392    * <p>No watcher is set whether this succeeds or not.
393    *
394    * <p>Returns false if the node was not in the proper state but did exist.
395    *
396    * <p>This method is used when a region finishes opening/closing.
397    * The Master acknowledges completion
398    * of the specified regions transition to being closed/opened.
399    *
400    * @param zkw zk reference
401    * @param encodedRegionName region to be deleted from zk
402    * @param expectedState state region must be in for delete to complete
403    * @param expectedVersion of the znode that is to be deleted.
404    *        If expectedVersion need not be compared while deleting the znode
405    *        pass -1
406    * @throws KeeperException if unexpected zookeeper exception
407    * @throws KeeperException.NoNodeException if node does not exist
408    */
409   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
410       EventType expectedState, int expectedVersion)
411   throws KeeperException, KeeperException.NoNodeException {
412     return deleteNode(zkw, encodedRegionName, expectedState, null, expectedVersion);
413   }
414 
415   /**
416    * Deletes an existing unassigned node that is in the specified state for the
417    * specified region.
418    *
419    * <p>If a node does not already exist for this region, a
420    * {@link NoNodeException} will be thrown.
421    *
422    * <p>No watcher is set whether this succeeds or not.
423    *
424    * <p>Returns false if the node was not in the proper state but did exist.
425    *
426    * <p>This method is used when a region finishes opening/closing.
427    * The Master acknowledges completion
428    * of the specified regions transition to being closed/opened.
429    *
430    * @param zkw zk reference
431    * @param encodedRegionName region to be deleted from zk
432    * @param expectedState state region must be in for delete to complete
433    * @param serverName the expected region transition target server name
434    * @param expectedVersion of the znode that is to be deleted.
435    *        If expectedVersion need not be compared while deleting the znode
436    *        pass -1
437    * @throws KeeperException if unexpected zookeeper exception
438    * @throws KeeperException.NoNodeException if node does not exist
439    */
440   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
441       EventType expectedState, ServerName serverName, int expectedVersion)
442   throws KeeperException, KeeperException.NoNodeException {
443     if (LOG.isTraceEnabled()) {
444     	LOG.trace(zkw.prefix("Deleting existing unassigned " +
445       "node " + encodedRegionName + " in expected state " + expectedState));
446     }
447     String node = getNodeName(zkw, encodedRegionName);
448     zkw.sync(node);
449     Stat stat = new Stat();
450     byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
451     if (bytes == null) {
452       // If it came back null, node does not exist.
453       throw KeeperException.create(Code.NONODE);
454     }
455     RegionTransition rt = getRegionTransition(bytes);
456     EventType et = rt.getEventType();
457     if (!et.equals(expectedState)) {
458       LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
459         expectedState + " state but node is in " + et + " state"));
460       return false;
461     }
462     // Verify the server transition happens on is not changed
463     if (serverName != null && !rt.getServerName().equals(serverName)) {
464       LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName
465         + " with target " + serverName + " but node has " + rt.getServerName()));
466       return false;
467     }
468     if (expectedVersion != -1
469         && stat.getVersion() != expectedVersion) {
470       LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
471         " the expected one. Got a version mismatch");
472       return false;
473     }
474     if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
475       LOG.warn(zkw.prefix("Attempting to delete " +
476           "unassigned node " + encodedRegionName + " in " + expectedState +
477           " state but after verifying state, we got a version mismatch"));
478       return false;
479     }
480     LOG.debug(zkw.prefix("Deleted unassigned node " +
481         encodedRegionName + " in expected state " + expectedState));
482     return true;
483   }
484 
485   /**
486    * Deletes all unassigned nodes regardless of their state.
487    *
488    * <p>No watchers are set.
489    *
490    * <p>This method is used by the Master during cluster startup to clear out
491    * any existing state from other cluster runs.
492    *
493    * @param zkw zk reference
494    * @throws KeeperException if unexpected zookeeper exception
495    */
496   public static void deleteAllNodes(ZooKeeperWatcher zkw)
497   throws KeeperException {
498     LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
499     ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
500   }
501 
502   /**
503    * Creates a new unassigned node in the CLOSING state for the specified
504    * region.
505    *
506    * <p>Does not transition nodes from any states.  If a node already exists
507    * for this region, a {@link NodeExistsException} will be thrown.
508    *
509    * <p>If creation is successful, returns the version number of the CLOSING
510    * node created.
511    *
512    * <p>Set a watch.
513    *
514    * <p>This method should only be used by a Master when initiating a
515    * close of a region before sending a close request to the region server.
516    *
517    * @param zkw zk reference
518    * @param region region to be created as closing
519    * @param serverName server transition will happen on
520    * @return version of node after transition, -1 if unsuccessful transition
521    * @throws KeeperException if unexpected zookeeper exception
522    * @throws KeeperException.NodeExistsException if node already exists
523    */
524   public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
525       ServerName serverName)
526   throws KeeperException, KeeperException.NodeExistsException {
527     LOG.debug(zkw.prefix("Creating unassigned node " +
528       region.getEncodedName() + " in a CLOSING state"));
529     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
530       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
531     String node = getNodeName(zkw, region.getEncodedName());
532     return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
533   }
534 
535   // RegionServer methods
536 
537   /**
538    * Transitions an existing unassigned node for the specified region which is
539    * currently in the CLOSING state to be in the CLOSED state.
540    *
541    * <p>Does not transition nodes from other states.  If for some reason the
542    * node could not be transitioned, the method returns -1.  If the transition
543    * is successful, the version of the node after transition is returned.
544    *
545    * <p>This method can fail and return false for three different reasons:
546    * <ul><li>Unassigned node for this region does not exist</li>
547    * <li>Unassigned node for this region is not in CLOSING state</li>
548    * <li>After verifying CLOSING state, update fails because of wrong version
549    * (someone else already transitioned the node)</li>
550    * </ul>
551    *
552    * <p>Does not set any watches.
553    *
554    * <p>This method should only be used by a RegionServer when initiating a
555    * close of a region after receiving a CLOSE RPC from the Master.
556    *
557    * @param zkw zk reference
558    * @param region region to be transitioned to closed
559    * @param serverName server transition happens on
560    * @return version of node after transition, -1 if unsuccessful transition
561    * @throws KeeperException if unexpected zookeeper exception
562    */
563   public static int transitionNodeClosed(ZooKeeperWatcher zkw,
564       HRegionInfo region, ServerName serverName, int expectedVersion)
565   throws KeeperException {
566     return transitionNode(zkw, region, serverName,
567         EventType.M_ZK_REGION_CLOSING,
568         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
569   }
570 
571   /**
572    * Transitions an existing unassigned node for the specified region which is
573    * currently in the OFFLINE state to be in the OPENING state.
574    *
575    * <p>Does not transition nodes from other states.  If for some reason the
576    * node could not be transitioned, the method returns -1.  If the transition
577    * is successful, the version of the node written as OPENING is returned.
578    *
579    * <p>This method can fail and return -1 for three different reasons:
580    * <ul><li>Unassigned node for this region does not exist</li>
581    * <li>Unassigned node for this region is not in OFFLINE state</li>
582    * <li>After verifying OFFLINE state, update fails because of wrong version
583    * (someone else already transitioned the node)</li>
584    * </ul>
585    *
586    * <p>Does not set any watches.
587    *
588    * <p>This method should only be used by a RegionServer when initiating an
589    * open of a region after receiving an OPEN RPC from the Master.
590    *
591    * @param zkw zk reference
592    * @param region region to be transitioned to opening
593    * @param serverName server transition happens on
594    * @return version of node after transition, -1 if unsuccessful transition
595    * @throws KeeperException if unexpected zookeeper exception
596    */
597   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
598       HRegionInfo region, ServerName serverName)
599   throws KeeperException {
600     return transitionNodeOpening(zkw, region, serverName,
601       EventType.M_ZK_REGION_OFFLINE);
602   }
603 
604   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
605       HRegionInfo region, ServerName serverName, final EventType beginState)
606   throws KeeperException {
607     return transitionNode(zkw, region, serverName, beginState,
608       EventType.RS_ZK_REGION_OPENING, -1);
609   }
610 
611   /**
612    * Retransitions an existing unassigned node for the specified region which is
613    * currently in the OPENING state to be in the OPENING state.
614    *
615    * <p>Does not transition nodes from other states.  If for some reason the
616    * node could not be transitioned, the method returns -1.  If the transition
617    * is successful, the version of the node rewritten as OPENING is returned.
618    *
619    * <p>This method can fail and return -1 for three different reasons:
620    * <ul><li>Unassigned node for this region does not exist</li>
621    * <li>Unassigned node for this region is not in OPENING state</li>
622    * <li>After verifying OPENING state, update fails because of wrong version
623    * (someone else already transitioned the node)</li>
624    * </ul>
625    *
626    * <p>Does not set any watches.
627    *
628    * <p>This method should only be used by a RegionServer when initiating an
629    * open of a region after receiving an OPEN RPC from the Master.
630    *
631    * @param zkw zk reference
632    * @param region region to be transitioned to opening
633    * @param serverName server transition happens on
634    * @param updateZNode write the znode. If false, we only check.
635    * @return version of node after transition, -1 if unsuccessful transition
636    * @throws KeeperException if unexpected zookeeper exception
637    */
638   public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
639       HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode)
640   throws KeeperException {
641 
642     String encoded = region.getEncodedName();
643     if(LOG.isDebugEnabled()) {
644       LOG.debug(zkw.prefix("Attempting to retransition opening state of node " +
645           HRegionInfo.prettyPrint(encoded)));
646     }
647 
648     String node = getNodeName(zkw, encoded);
649     zkw.sync(node);
650 
651     // Read existing data of the node
652     Stat stat = new Stat();
653     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
654     if (existingBytes == null) {
655       // Node no longer exists.  Return -1. It means unsuccessful transition.
656       return -1;
657     }
658     RegionTransition rt = getRegionTransition(existingBytes);
659 
660     // Verify it is the expected version
661     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
662       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
663           "unassigned node for " + encoded + " failed, " +
664           "the node existed but was version " + stat.getVersion() +
665           " not the expected version " + expectedVersion));
666       return -1;
667     }
668 
669     // Verify it is in expected state
670     EventType et = rt.getEventType();
671     if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
672       String existingServer = (rt.getServerName() == null)
673           ? "<unknown>" : rt.getServerName().toString();
674       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
675           + encoded + " failed, the node existed but was in the state " + et +
676           " set by the server " + existingServer));
677       return -1;
678     }
679 
680     // We don't have to write the new state: the check is complete.
681     if (!updateZNode){
682       return expectedVersion;
683     }
684 
685     // Write new data, ensuring data has not changed since we last read it
686     try {
687       rt = RegionTransition.createRegionTransition(
688           EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null);
689       if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
690         LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
691             "unassigned node for " + encoded + " failed, " +
692             "the node existed and was in the expected state but then when " +
693             "setting data we got a version mismatch"));
694         return -1;
695       }
696       if(LOG.isDebugEnabled()) {
697         LOG.debug(zkw.prefix("Retransition opening state of node " + encoded));
698       }
699       return stat.getVersion() + 1;
700     } catch (KeeperException.NoNodeException nne) {
701       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
702           "unassigned node for " + encoded + " failed, " +
703           "the node existed and was in the expected state but then when " +
704           "setting data it no longer existed"));
705       return -1;
706     }
707   }
708 
709   /**
710    * Transitions an existing unassigned node for the specified region which is
711    * currently in the OPENING state to be in the OPENED state.
712    *
713    * <p>Does not transition nodes from other states.  If for some reason the
714    * node could not be transitioned, the method returns -1.  If the transition
715    * is successful, the version of the node after transition is returned.
716    *
717    * <p>This method can fail and return false for three different reasons:
718    * <ul><li>Unassigned node for this region does not exist</li>
719    * <li>Unassigned node for this region is not in OPENING state</li>
720    * <li>After verifying OPENING state, update fails because of wrong version
721    * (this should never actually happen since an RS only does this transition
722    * following a transition to OPENING.  if two RS are conflicting, one would
723    * fail the original transition to OPENING and not this transition)</li>
724    * </ul>
725    *
726    * <p>Does not set any watches.
727    *
728    * <p>This method should only be used by a RegionServer when completing the
729    * open of a region.
730    *
731    * @param zkw zk reference
732    * @param region region to be transitioned to opened
733    * @param serverName server transition happens on
734    * @return version of node after transition, -1 if unsuccessful transition
735    * @throws KeeperException if unexpected zookeeper exception
736    */
737   public static int transitionNodeOpened(ZooKeeperWatcher zkw,
738       HRegionInfo region, ServerName serverName, int expectedVersion)
739   throws KeeperException {
740     return transitionNode(zkw, region, serverName,
741         EventType.RS_ZK_REGION_OPENING,
742         EventType.RS_ZK_REGION_OPENED, expectedVersion);
743   }
744 
745   /**
746    *
747    * @param zkw zk reference
748    * @param region region to be closed
749    * @param expectedVersion expected version of the znode
750    * @return true if the znode exists, has the right version and the right state. False otherwise.
751    * @throws KeeperException
752    */
753   public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
754                                           int expectedVersion) throws KeeperException {
755 
756     final String encoded = getNodeName(zkw, region.getEncodedName());
757     zkw.sync(encoded);
758 
759     // Read existing data of the node
760     Stat stat = new Stat();
761     byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
762 
763     if (existingBytes == null) {
764       LOG.warn(zkw.prefix("Attempt to check the " +
765           "closing node for " + encoded +
766           ". The node does not exist"));
767       return false;
768     }
769 
770     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
771       LOG.warn(zkw.prefix("Attempt to check the " +
772           "closing node for " + encoded +
773           ". The node existed but was version " + stat.getVersion() +
774           " not the expected version " + expectedVersion));
775       return false;
776     }
777 
778     RegionTransition rt = getRegionTransition(existingBytes);
779 
780     if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
781       LOG.warn(zkw.prefix("Attempt to check the " +
782           "closing node for " + encoded +
783           ". The node existed but was in an unexpected state: " + rt.getEventType()));
784       return false;
785     }
786 
787     return true;
788   }
789 
790   /**
791    * Method that actually performs unassigned node transitions.
792    *
793    * <p>Attempts to transition the unassigned node for the specified region
794    * from the expected state to the state in the specified transition data.
795    *
796    * <p>Method first reads existing data and verifies it is in the expected
797    * state.  If the node does not exist or the node is not in the expected
798    * state, the method returns -1.  If the transition is successful, the
799    * version number of the node following the transition is returned.
800    *
801    * <p>If the read state is what is expected, it attempts to write the new
802    * state and data into the node.  When doing this, it includes the expected
803    * version (determined when the existing state was verified) to ensure that
804    * only one transition is successful.  If there is a version mismatch, the
805    * method returns -1.
806    *
807    * <p>If the write is successful, no watch is set and the method returns true.
808    *
809    * @param zkw zk reference
810    * @param region region to be transitioned to opened
811    * @param serverName server transition happens on
812    * @param endState state to transition node to if all checks pass
813    * @param beginState state the node must currently be in to do transition
814    * @param expectedVersion expected version of data before modification, or -1
815    * @return version of node after transition, -1 if unsuccessful transition
816    * @throws KeeperException if unexpected zookeeper exception
817    */
818   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
819       ServerName serverName, EventType beginState, EventType endState,
820       int expectedVersion)
821   throws KeeperException {
822     return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
823   }
824 
825 
826   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
827       ServerName serverName, EventType beginState, EventType endState,
828       int expectedVersion, final byte [] payload)
829   throws KeeperException {
830     String encoded = region.getEncodedName();
831     if(LOG.isDebugEnabled()) {
832       LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
833         " from " + beginState.toString() + " to " + endState.toString()));
834     }
835 
836     String node = getNodeName(zkw, encoded);
837     zkw.sync(node);
838 
839     // Read existing data of the node
840     Stat stat = new Stat();
841     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
842     if (existingBytes == null) {
843       // Node no longer exists.  Return -1. It means unsuccessful transition.
844       return -1;
845     }
846 
847     // Verify it is the expected version
848     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
849       LOG.warn(zkw.prefix("Attempt to transition the " +
850         "unassigned node for " + encoded +
851         " from " + beginState + " to " + endState + " failed, " +
852         "the node existed but was version " + stat.getVersion() +
853         " not the expected version " + expectedVersion));
854         return -1;
855     }
856 
857     if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
858         && endState.equals(EventType.RS_ZK_REGION_OPENING)
859         && expectedVersion == -1 && stat.getVersion() != 0) {
860       // the below check ensures that double assignment doesnot happen.
861       // When the node is created for the first time then the expected version
862       // that is passed will be -1 and the version in znode will be 0.
863       // In all other cases the version in znode will be > 0.
864       LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
865           + encoded + " from " + beginState + " to " + endState + " failed, "
866           + "the node existed but was version " + stat.getVersion()
867           + " not the expected version " + expectedVersion));
868       return -1;
869     }
870 
871     RegionTransition rt = getRegionTransition(existingBytes);
872 
873     // Verify the server transition happens on is not changed
874     if (!rt.getServerName().equals(serverName)) {
875       LOG.warn(zkw.prefix("Attempt to transition the " +
876         "unassigned node for " + encoded +
877         " from " + beginState + " to " + endState + " failed, " +
878         "the server that tried to transition was " + serverName +
879         " not the expected " + rt.getServerName()));
880       return -1;
881     }
882 
883     // Verify it is in expected state
884     EventType et = rt.getEventType();
885     if (!et.equals(beginState)) {
886       String existingServer = (rt.getServerName() == null)
887         ? "<unknown>" : rt.getServerName().toString();
888       LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
889         + " from " + beginState + " to " + endState + " failed, the node existed but"
890         + " was in the state " + et + " set by the server " + existingServer));
891       return -1;
892     }
893 
894     // Write new data, ensuring data has not changed since we last read it
895     try {
896       rt = RegionTransition.createRegionTransition(
897           endState, region.getRegionName(), serverName, payload);
898       if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
899         LOG.warn(zkw.prefix("Attempt to transition the " +
900         "unassigned node for " + encoded +
901         " from " + beginState + " to " + endState + " failed, " +
902         "the node existed and was in the expected state but then when " +
903         "setting data we got a version mismatch"));
904         return -1;
905       }
906       if(LOG.isDebugEnabled()) {
907         LOG.debug(zkw.prefix("Transitioned node " + encoded +
908           " from " + beginState + " to " + endState));
909       }
910       return stat.getVersion() + 1;
911     } catch (KeeperException.NoNodeException nne) {
912       LOG.warn(zkw.prefix("Attempt to transition the " +
913         "unassigned node for " + encoded +
914         " from " + beginState + " to " + endState + " failed, " +
915         "the node existed and was in the expected state but then when " +
916         "setting data it no longer existed"));
917       return -1;
918     }
919   }
920 
921   private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
922     try {
923       return RegionTransition.parseFrom(bytes);
924     } catch (DeserializationException e) {
925       // Convert to a zk exception for now.  Otherwise have to change API
926       throw ZKUtil.convert(e);
927     }
928   }
929 
930   /**
931    * Gets the current data in the unassigned node for the specified region name
932    * or fully-qualified path.
933    *
934    * <p>Returns null if the region does not currently have a node.
935    *
936    * <p>Sets a watch on the node if the node exists.
937    *
938    * @param zkw zk reference
939    * @param pathOrRegionName fully-specified path or region name
940    * @return znode content
941    * @throws KeeperException if unexpected zookeeper exception
942    */
943   public static byte [] getData(ZooKeeperWatcher zkw,
944       String pathOrRegionName)
945   throws KeeperException {
946     String node = getPath(zkw, pathOrRegionName);
947     return ZKUtil.getDataAndWatch(zkw, node);
948   }
949 
950   /**
951    * Gets the current data in the unassigned node for the specified region name
952    * or fully-qualified path.
953    *
954    * <p>Returns null if the region does not currently have a node.
955    *
956    * <p>Sets a watch on the node if the node exists.
957    *
958    * @param zkw zk reference
959    * @param pathOrRegionName fully-specified path or region name
960    * @param stat object to populate the version.
961    * @return znode content
962    * @throws KeeperException if unexpected zookeeper exception
963    */
964   public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
965       String pathOrRegionName, Stat stat)
966   throws KeeperException {
967     String node = getPath(zkw, pathOrRegionName);
968     return ZKUtil.getDataAndWatch(zkw, node, stat);
969   }
970 
971   /**
972    * Gets the current data in the unassigned node for the specified region name
973    * or fully-qualified path.
974    *
975    * <p>Returns null if the region does not currently have a node.
976    *
977    * <p>Does not set a watch.
978    *
979    * @param zkw zk reference
980    * @param pathOrRegionName fully-specified path or region name
981    * @param stat object to store node info into on getData call
982    * @return znode content
983    * @throws KeeperException if unexpected zookeeper exception
984    */
985   public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
986       String pathOrRegionName, Stat stat)
987   throws KeeperException {
988     String node = getPath(zkw, pathOrRegionName);
989     return ZKUtil.getDataNoWatch(zkw, node, stat);
990   }
991 
992   /**
993    * @param zkw
994    * @param pathOrRegionName
995    * @return Path to znode
996    */
997   public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
998     return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
999   }
1000 
1001   /**
1002    * Get the version of the specified znode
1003    * @param zkw zk reference
1004    * @param region region's info
1005    * @return the version of the znode, -1 if it doesn't exist
1006    * @throws KeeperException
1007    */
1008   public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
1009     throws KeeperException {
1010     String znode = getNodeName(zkw, region.getEncodedName());
1011     return ZKUtil.checkExists(zkw, znode);
1012   }
1013 
1014   /**
1015    * Delete the assignment node regardless of its current state.
1016    * <p>
1017    * Fail silent even if the node does not exist at all.
1018    * @param watcher
1019    * @param regionInfo
1020    * @throws KeeperException
1021    */
1022   public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
1023       HRegionInfo regionInfo)
1024   throws KeeperException {
1025     String node = getNodeName(watcher, regionInfo.getEncodedName());
1026     ZKUtil.deleteNodeFailSilent(watcher, node);
1027   }
1028 
1029   /**
1030    * Blocks until there are no node in regions in transition.
1031    * <p>
1032    * Used in testing only.
1033    * @param zkw zk reference
1034    * @throws KeeperException
1035    * @throws InterruptedException
1036    */
1037   public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
1038   throws KeeperException, InterruptedException {
1039     while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1040       List<String> znodes =
1041         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1042       if (znodes != null && !znodes.isEmpty()) {
1043         LOG.debug("Waiting on RIT: " + znodes);
1044       }
1045       Thread.sleep(100);
1046     }
1047   }
1048 
1049   /**
1050    * Blocks until there is at least one node in regions in transition.
1051    * <p>
1052    * Used in testing only.
1053    * @param zkw zk reference
1054    * @throws KeeperException
1055    * @throws InterruptedException
1056    */
1057   public static void blockUntilRIT(ZooKeeperWatcher zkw)
1058   throws KeeperException, InterruptedException {
1059     while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1060       List<String> znodes =
1061         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1062       if (znodes == null || znodes.isEmpty()) {
1063         LOG.debug("No RIT in ZK");
1064       }
1065       Thread.sleep(100);
1066     }
1067   }
1068 
1069   /**
1070    * Presume bytes are serialized unassigned data structure
1071    * @param znodeBytes
1072    * @return String of the deserialized znode bytes.
1073    */
1074   static String toString(final byte[] znodeBytes) {
1075     // This method should not exist.  Used by ZKUtil stringifying RegionTransition.  Have the
1076     // method in here so RegionTransition does not leak into ZKUtil.
1077     try {
1078       RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1079       return rt.toString();
1080     } catch (DeserializationException e) {
1081       return "";
1082     }
1083   }
1084 }