1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.zookeeper;
20
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.classification.InterfaceAudience;
24 import org.apache.hadoop.classification.InterfaceStability;
25 import org.apache.hadoop.hbase.HConstants;
26 import org.apache.hadoop.hbase.HRegionInfo;
27 import org.apache.hadoop.hbase.RegionTransition;
28 import org.apache.hadoop.hbase.ServerName;
29 import org.apache.hadoop.hbase.exceptions.DeserializationException;
30 import org.apache.hadoop.hbase.executor.EventType;
31 import org.apache.zookeeper.AsyncCallback;
32 import org.apache.zookeeper.KeeperException;
33 import org.apache.zookeeper.KeeperException.Code;
34 import org.apache.zookeeper.KeeperException.NoNodeException;
35 import org.apache.zookeeper.KeeperException.NodeExistsException;
36 import org.apache.zookeeper.data.Stat;
37
38 import java.util.List;
39
40 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be
41 // about zk and bytes only.
42
43 /**
44 * Utility class for doing region assignment in ZooKeeper. This class extends
45 * stuff done in {@link ZKUtil} to cover specific assignment operations.
46 * <p>
47 * Contains only static methods and constants.
48 * <p>
49 * Used by both the Master and RegionServer.
50 * <p>
51 * All valid transitions outlined below:
52 * <p>
53 * <b>MASTER</b>
54 * <ol>
55 * <li>
56 * Master creates an unassigned node as OFFLINE.
57 * - Cluster startup and table enabling.
58 * </li>
59 * <li>
60 * Master forces an existing unassigned node to OFFLINE.
61 * - RegionServer failure.
62 * - Allows transitions from all states to OFFLINE.
63 * </li>
64 * <li>
65 * Master deletes an unassigned node that was in a OPENED state.
66 * - Normal region transitions. Besides cluster startup, no other deletions
67 * of unassigned nodes is allowed.
68 * </li>
69 * <li>
70 * Master deletes all unassigned nodes regardless of state.
71 * - Cluster startup before any assignment happens.
72 * </li>
73 * </ol>
74 * <p>
75 * <b>REGIONSERVER</b>
76 * <ol>
77 * <li>
78 * RegionServer creates an unassigned node as CLOSING.
79 * - All region closes will do this in response to a CLOSE RPC from Master.
80 * - A node can never be transitioned to CLOSING, only created.
81 * </li>
82 * <li>
83 * RegionServer transitions an unassigned node from CLOSING to CLOSED.
84 * - Normal region closes. CAS operation.
85 * </li>
86 * <li>
87 * RegionServer transitions an unassigned node from OFFLINE to OPENING.
88 * - All region opens will do this in response to an OPEN RPC from the Master.
89 * - Normal region opens. CAS operation.
90 * </li>
91 * <li>
92 * RegionServer transitions an unassigned node from OPENING to OPENED.
93 * - Normal region opens. CAS operation.
94 * </li>
95 * </ol>
96 */
97 @InterfaceAudience.Public
98 @InterfaceStability.Evolving
99 public class ZKAssign {
100 private static final Log LOG = LogFactory.getLog(ZKAssign.class);
101
102 /**
103 * Gets the full path node name for the unassigned node for the specified
104 * region.
105 * @param zkw zk reference
106 * @param regionName region name
107 * @return full path node name
108 */
109 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
110 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
111 }
112
113 /**
114 * Gets the region name from the full path node name of an unassigned node.
115 * @param path full zk path
116 * @return region name
117 */
118 public static String getRegionName(ZooKeeperWatcher zkw, String path) {
119 return path.substring(zkw.assignmentZNode.length()+1);
120 }
121
122 // Master methods
123
124 /**
125 * Creates a new unassigned node in the OFFLINE state for the specified region.
126 *
127 * <p>Does not transition nodes from other states. If a node already exists
128 * for this region, a {@link NodeExistsException} will be thrown.
129 *
130 * <p>Sets a watcher on the unassigned region node if the method is successful.
131 *
132 * <p>This method should only be used during cluster startup and the enabling
133 * of a table.
134 *
135 * @param zkw zk reference
136 * @param region region to be created as offline
137 * @param serverName server transition will happen on
138 * @throws KeeperException if unexpected zookeeper exception
139 * @throws KeeperException.NodeExistsException if node already exists
140 */
141 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
142 ServerName serverName)
143 throws KeeperException, KeeperException.NodeExistsException {
144 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
145 }
146
147 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
148 ServerName serverName, final EventType event)
149 throws KeeperException, KeeperException.NodeExistsException {
150 LOG.debug(zkw.prefix("Creating unassigned node for " +
151 region.getEncodedName() + " in OFFLINE state"));
152 RegionTransition rt =
153 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
154 String node = getNodeName(zkw, region.getEncodedName());
155 ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
156 }
157
158 /**
159 * Creates an unassigned node in the OFFLINE state for the specified region.
160 * <p>
161 * Runs asynchronously. Depends on no pre-existing znode.
162 *
163 * <p>Sets a watcher on the unassigned region node.
164 *
165 * @param zkw zk reference
166 * @param region region to be created as offline
167 * @param serverName server transition will happen on
168 * @param cb
169 * @param ctx
170 * @throws KeeperException if unexpected zookeeper exception
171 * @throws KeeperException.NodeExistsException if node already exists
172 */
173 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
174 HRegionInfo region, ServerName serverName,
175 final AsyncCallback.StringCallback cb, final Object ctx)
176 throws KeeperException {
177 LOG.debug(zkw.prefix("Async create of unassigned node for " +
178 region.getEncodedName() + " with OFFLINE state"));
179 RegionTransition rt =
180 RegionTransition.createRegionTransition(
181 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
182 String node = getNodeName(zkw, region.getEncodedName());
183 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
184 }
185
186 /**
187 * Creates or force updates an unassigned node to the OFFLINE state for the
188 * specified region.
189 * <p>
190 * Attempts to create the node but if it exists will force it to transition to
191 * and OFFLINE state.
192 *
193 * <p>Sets a watcher on the unassigned region node if the method is
194 * successful.
195 *
196 * <p>This method should be used when assigning a region.
197 *
198 * @param zkw zk reference
199 * @param region region to be created as offline
200 * @param serverName server transition will happen on
201 * @return the version of the znode created in OFFLINE state, -1 if
202 * unsuccessful.
203 * @throws KeeperException if unexpected zookeeper exception
204 * @throws KeeperException.NodeExistsException if node already exists
205 */
206 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
207 HRegionInfo region, ServerName serverName) throws KeeperException {
208 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
209 region.getEncodedName() + " with OFFLINE state"));
210 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
211 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
212 byte [] data = rt.toByteArray();
213 String node = getNodeName(zkw, region.getEncodedName());
214 zkw.sync(node);
215 int version = ZKUtil.checkExists(zkw, node);
216 if (version == -1) {
217 return ZKUtil.createAndWatch(zkw, node, data);
218 } else {
219 boolean setData = false;
220 try {
221 setData = ZKUtil.setData(zkw, node, data, version);
222 // Setdata throws KeeperException which aborts the Master. So we are
223 // catching it here.
224 // If just before setting the znode to OFFLINE if the RS has made any
225 // change to the
226 // znode state then we need to return -1.
227 } catch (KeeperException kpe) {
228 LOG.info("Version mismatch while setting the node to OFFLINE state.");
229 return -1;
230 }
231 if (!setData) {
232 return -1;
233 } else {
234 // We successfully forced to OFFLINE, reset watch and handle if
235 // the state changed in between our set and the watch
236 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
237 rt = getRegionTransition(bytes);
238 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
239 // state changed, need to process
240 return -1;
241 }
242 }
243 }
244 return version + 1;
245 }
246
247 /**
248 * Deletes an existing unassigned node that is in the OPENED state for the
249 * specified region.
250 *
251 * <p>If a node does not already exist for this region, a
252 * {@link NoNodeException} will be thrown.
253 *
254 * <p>No watcher is set whether this succeeds or not.
255 *
256 * <p>Returns false if the node was not in the proper state but did exist.
257 *
258 * <p>This method is used during normal region transitions when a region
259 * finishes successfully opening. This is the Master acknowledging completion
260 * of the specified regions transition.
261 *
262 * @param zkw zk reference
263 * @param encodedRegionName opened region to be deleted from zk
264 * @throws KeeperException if unexpected zookeeper exception
265 * @throws KeeperException.NoNodeException if node does not exist
266 */
267 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
268 String encodedRegionName)
269 throws KeeperException, KeeperException.NoNodeException {
270 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_OPENED);
271 }
272
273 /**
274 * Deletes an existing unassigned node that is in the OFFLINE state for the
275 * specified region.
276 *
277 * <p>If a node does not already exist for this region, a
278 * {@link NoNodeException} will be thrown.
279 *
280 * <p>No watcher is set whether this succeeds or not.
281 *
282 * <p>Returns false if the node was not in the proper state but did exist.
283 *
284 * <p>This method is used during master failover when the regions on an RS
285 * that has died are all set to OFFLINE before being processed.
286 *
287 * @param zkw zk reference
288 * @param encodedRegionName closed region to be deleted from zk
289 * @throws KeeperException if unexpected zookeeper exception
290 * @throws KeeperException.NoNodeException if node does not exist
291 */
292 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
293 String encodedRegionName)
294 throws KeeperException, KeeperException.NoNodeException {
295 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_OFFLINE);
296 }
297
298 /**
299 * Deletes an existing unassigned node that is in the CLOSED state for the
300 * specified region.
301 *
302 * <p>If a node does not already exist for this region, a
303 * {@link NoNodeException} will be thrown.
304 *
305 * <p>No watcher is set whether this succeeds or not.
306 *
307 * <p>Returns false if the node was not in the proper state but did exist.
308 *
309 * <p>This method is used during table disables when a region finishes
310 * successfully closing. This is the Master acknowledging completion
311 * of the specified regions transition to being closed.
312 *
313 * @param zkw zk reference
314 * @param encodedRegionName closed region to be deleted from zk
315 * @throws KeeperException if unexpected zookeeper exception
316 * @throws KeeperException.NoNodeException if node does not exist
317 */
318 public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
319 String encodedRegionName)
320 throws KeeperException, KeeperException.NoNodeException {
321 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_CLOSED);
322 }
323
324 /**
325 * Deletes an existing unassigned node that is in the CLOSING state for the
326 * specified region.
327 *
328 * <p>If a node does not already exist for this region, a
329 * {@link NoNodeException} will be thrown.
330 *
331 * <p>No watcher is set whether this succeeds or not.
332 *
333 * <p>Returns false if the node was not in the proper state but did exist.
334 *
335 * <p>This method is used during table disables when a region finishes
336 * successfully closing. This is the Master acknowledging completion
337 * of the specified regions transition to being closed.
338 *
339 * @param zkw zk reference
340 * @param region closing region to be deleted from zk
341 * @throws KeeperException if unexpected zookeeper exception
342 * @throws KeeperException.NoNodeException if node does not exist
343 */
344 public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
345 HRegionInfo region)
346 throws KeeperException, KeeperException.NoNodeException {
347 String encodedRegionName = region.getEncodedName();
348 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_CLOSING);
349 }
350
351 /**
352 * Deletes an existing unassigned node that is in the specified state for the
353 * specified region.
354 *
355 * <p>If a node does not already exist for this region, a
356 * {@link NoNodeException} will be thrown.
357 *
358 * <p>No watcher is set whether this succeeds or not.
359 *
360 * <p>Returns false if the node was not in the proper state but did exist.
361 *
362 * <p>This method is used when a region finishes opening/closing.
363 * The Master acknowledges completion
364 * of the specified regions transition to being closed/opened.
365 *
366 * @param zkw zk reference
367 * @param encodedRegionName region to be deleted from zk
368 * @param expectedState state region must be in for delete to complete
369 * @throws KeeperException if unexpected zookeeper exception
370 * @throws KeeperException.NoNodeException if node does not exist
371 */
372 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
373 EventType expectedState)
374 throws KeeperException, KeeperException.NoNodeException {
375 return deleteNode(zkw, encodedRegionName, expectedState, -1);
376 }
377
378 /**
379 * Deletes an existing unassigned node that is in the specified state for the
380 * specified region.
381 *
382 * <p>If a node does not already exist for this region, a
383 * {@link NoNodeException} will be thrown.
384 *
385 * <p>No watcher is set whether this succeeds or not.
386 *
387 * <p>Returns false if the node was not in the proper state but did exist.
388 *
389 * <p>This method is used when a region finishes opening/closing.
390 * The Master acknowledges completion
391 * of the specified regions transition to being closed/opened.
392 *
393 * @param zkw zk reference
394 * @param encodedRegionName region to be deleted from zk
395 * @param expectedState state region must be in for delete to complete
396 * @param expectedVersion of the znode that is to be deleted.
397 * If expectedVersion need not be compared while deleting the znode
398 * pass -1
399 * @throws KeeperException if unexpected zookeeper exception
400 * @throws KeeperException.NoNodeException if node does not exist
401 */
402 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
403 EventType expectedState, int expectedVersion)
404 throws KeeperException, KeeperException.NoNodeException {
405 LOG.debug(zkw.prefix("Deleting existing unassigned " +
406 "node for " + encodedRegionName + " that is in expected state " + expectedState));
407 String node = getNodeName(zkw, encodedRegionName);
408 zkw.sync(node);
409 Stat stat = new Stat();
410 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
411 if (bytes == null) {
412 // If it came back null, node does not exist.
413 throw KeeperException.create(Code.NONODE);
414 }
415 RegionTransition rt = getRegionTransition(bytes);
416 EventType et = rt.getEventType();
417 if (!et.equals(expectedState)) {
418 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
419 expectedState + " state but node is in " + et + " state"));
420 return false;
421 }
422 if (expectedVersion != -1
423 && stat.getVersion() != expectedVersion) {
424 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
425 " the expected one. Got a version mismatch");
426 return false;
427 }
428 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
429 LOG.warn(zkw.prefix("Attempting to delete " +
430 "unassigned node " + encodedRegionName + " in " + expectedState +
431 " state but after verifying state, we got a version mismatch"));
432 return false;
433 }
434 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
435 encodedRegionName + " in expected state " + expectedState));
436 return true;
437 }
438
439 /**
440 * Deletes all unassigned nodes regardless of their state.
441 *
442 * <p>No watchers are set.
443 *
444 * <p>This method is used by the Master during cluster startup to clear out
445 * any existing state from other cluster runs.
446 *
447 * @param zkw zk reference
448 * @throws KeeperException if unexpected zookeeper exception
449 */
450 public static void deleteAllNodes(ZooKeeperWatcher zkw)
451 throws KeeperException {
452 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
453 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
454 }
455
456 /**
457 * Creates a new unassigned node in the CLOSING state for the specified
458 * region.
459 *
460 * <p>Does not transition nodes from any states. If a node already exists
461 * for this region, a {@link NodeExistsException} will be thrown.
462 *
463 * <p>If creation is successful, returns the version number of the CLOSING
464 * node created.
465 *
466 * <p>Set a watch.
467 *
468 * <p>This method should only be used by a Master when initiating a
469 * close of a region before sending a close request to the region server.
470 *
471 * @param zkw zk reference
472 * @param region region to be created as closing
473 * @param serverName server transition will happen on
474 * @return version of node after transition, -1 if unsuccessful transition
475 * @throws KeeperException if unexpected zookeeper exception
476 * @throws KeeperException.NodeExistsException if node already exists
477 */
478 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
479 ServerName serverName)
480 throws KeeperException, KeeperException.NodeExistsException {
481 LOG.debug(zkw.prefix("Creating unassigned node for " +
482 region.getEncodedName() + " in a CLOSING state"));
483 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
484 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
485 String node = getNodeName(zkw, region.getEncodedName());
486 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
487 }
488
489 // RegionServer methods
490
491 /**
492 * Transitions an existing unassigned node for the specified region which is
493 * currently in the CLOSING state to be in the CLOSED state.
494 *
495 * <p>Does not transition nodes from other states. If for some reason the
496 * node could not be transitioned, the method returns -1. If the transition
497 * is successful, the version of the node after transition is returned.
498 *
499 * <p>This method can fail and return false for three different reasons:
500 * <ul><li>Unassigned node for this region does not exist</li>
501 * <li>Unassigned node for this region is not in CLOSING state</li>
502 * <li>After verifying CLOSING state, update fails because of wrong version
503 * (someone else already transitioned the node)</li>
504 * </ul>
505 *
506 * <p>Does not set any watches.
507 *
508 * <p>This method should only be used by a RegionServer when initiating a
509 * close of a region after receiving a CLOSE RPC from the Master.
510 *
511 * @param zkw zk reference
512 * @param region region to be transitioned to closed
513 * @param serverName server transition happens on
514 * @return version of node after transition, -1 if unsuccessful transition
515 * @throws KeeperException if unexpected zookeeper exception
516 */
517 public static int transitionNodeClosed(ZooKeeperWatcher zkw,
518 HRegionInfo region, ServerName serverName, int expectedVersion)
519 throws KeeperException {
520 return transitionNode(zkw, region, serverName,
521 EventType.M_ZK_REGION_CLOSING,
522 EventType.RS_ZK_REGION_CLOSED, expectedVersion);
523 }
524
525 /**
526 * Transitions an existing unassigned node for the specified region which is
527 * currently in the OFFLINE state to be in the OPENING state.
528 *
529 * <p>Does not transition nodes from other states. If for some reason the
530 * node could not be transitioned, the method returns -1. If the transition
531 * is successful, the version of the node written as OPENING is returned.
532 *
533 * <p>This method can fail and return -1 for three different reasons:
534 * <ul><li>Unassigned node for this region does not exist</li>
535 * <li>Unassigned node for this region is not in OFFLINE state</li>
536 * <li>After verifying OFFLINE state, update fails because of wrong version
537 * (someone else already transitioned the node)</li>
538 * </ul>
539 *
540 * <p>Does not set any watches.
541 *
542 * <p>This method should only be used by a RegionServer when initiating an
543 * open of a region after receiving an OPEN RPC from the Master.
544 *
545 * @param zkw zk reference
546 * @param region region to be transitioned to opening
547 * @param serverName server transition happens on
548 * @return version of node after transition, -1 if unsuccessful transition
549 * @throws KeeperException if unexpected zookeeper exception
550 */
551 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
552 HRegionInfo region, ServerName serverName)
553 throws KeeperException {
554 return transitionNodeOpening(zkw, region, serverName,
555 EventType.M_ZK_REGION_OFFLINE);
556 }
557
558 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
559 HRegionInfo region, ServerName serverName, final EventType beginState)
560 throws KeeperException {
561 return transitionNode(zkw, region, serverName, beginState,
562 EventType.RS_ZK_REGION_OPENING, -1);
563 }
564
565 /**
566 * Retransitions an existing unassigned node for the specified region which is
567 * currently in the OPENING state to be in the OPENING state.
568 *
569 * <p>Does not transition nodes from other states. If for some reason the
570 * node could not be transitioned, the method returns -1. If the transition
571 * is successful, the version of the node rewritten as OPENING is returned.
572 *
573 * <p>This method can fail and return -1 for three different reasons:
574 * <ul><li>Unassigned node for this region does not exist</li>
575 * <li>Unassigned node for this region is not in OPENING state</li>
576 * <li>After verifying OPENING state, update fails because of wrong version
577 * (someone else already transitioned the node)</li>
578 * </ul>
579 *
580 * <p>Does not set any watches.
581 *
582 * <p>This method should only be used by a RegionServer when initiating an
583 * open of a region after receiving an OPEN RPC from the Master.
584 *
585 * @param zkw zk reference
586 * @param region region to be transitioned to opening
587 * @param serverName server transition happens on
588 * @param updateZNode write the znode. If false, we only check.
589 * @return version of node after transition, -1 if unsuccessful transition
590 * @throws KeeperException if unexpected zookeeper exception
591 */
592 public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
593 HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode)
594 throws KeeperException {
595
596 String encoded = region.getEncodedName();
597 if(LOG.isDebugEnabled()) {
598 LOG.debug(zkw.prefix("Attempting to retransition the opening state of node " +
599 HRegionInfo.prettyPrint(encoded)));
600 }
601
602 String node = getNodeName(zkw, encoded);
603 zkw.sync(node);
604
605 // Read existing data of the node
606 Stat stat = new Stat();
607 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
608 if (existingBytes == null) {
609 // Node no longer exists. Return -1. It means unsuccessful transition.
610 return -1;
611 }
612 RegionTransition rt = getRegionTransition(existingBytes);
613
614 // Verify it is the expected version
615 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
616 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
617 "unassigned node for " + encoded + " failed, " +
618 "the node existed but was version " + stat.getVersion() +
619 " not the expected version " + expectedVersion));
620 return -1;
621 }
622
623 // Verify it is in expected state
624 EventType et = rt.getEventType();
625 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
626 String existingServer = (rt.getServerName() == null)
627 ? "<unknown>" : rt.getServerName().toString();
628 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
629 + encoded + " failed, the node existed but was in the state " + et +
630 " set by the server " + existingServer));
631 return -1;
632 }
633
634 // We don't have to write the new state: the check is complete.
635 if (!updateZNode){
636 return expectedVersion;
637 }
638
639 // Write new data, ensuring data has not changed since we last read it
640 try {
641 rt = RegionTransition.createRegionTransition(
642 EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null);
643 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
644 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
645 "unassigned node for " + encoded + " failed, " +
646 "the node existed and was in the expected state but then when " +
647 "setting data we got a version mismatch"));
648 return -1;
649 }
650 if(LOG.isDebugEnabled()) {
651 LOG.debug(zkw.prefix("Successfully retransition the opening state of node " + encoded));
652 }
653 return stat.getVersion() + 1;
654 } catch (KeeperException.NoNodeException nne) {
655 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
656 "unassigned node for " + encoded + " failed, " +
657 "the node existed and was in the expected state but then when " +
658 "setting data it no longer existed"));
659 return -1;
660 }
661 }
662
663 /**
664 * Transitions an existing unassigned node for the specified region which is
665 * currently in the OPENING state to be in the OPENED state.
666 *
667 * <p>Does not transition nodes from other states. If for some reason the
668 * node could not be transitioned, the method returns -1. If the transition
669 * is successful, the version of the node after transition is returned.
670 *
671 * <p>This method can fail and return false for three different reasons:
672 * <ul><li>Unassigned node for this region does not exist</li>
673 * <li>Unassigned node for this region is not in OPENING state</li>
674 * <li>After verifying OPENING state, update fails because of wrong version
675 * (this should never actually happen since an RS only does this transition
676 * following a transition to OPENING. if two RS are conflicting, one would
677 * fail the original transition to OPENING and not this transition)</li>
678 * </ul>
679 *
680 * <p>Does not set any watches.
681 *
682 * <p>This method should only be used by a RegionServer when completing the
683 * open of a region.
684 *
685 * @param zkw zk reference
686 * @param region region to be transitioned to opened
687 * @param serverName server transition happens on
688 * @return version of node after transition, -1 if unsuccessful transition
689 * @throws KeeperException if unexpected zookeeper exception
690 */
691 public static int transitionNodeOpened(ZooKeeperWatcher zkw,
692 HRegionInfo region, ServerName serverName, int expectedVersion)
693 throws KeeperException {
694 return transitionNode(zkw, region, serverName,
695 EventType.RS_ZK_REGION_OPENING,
696 EventType.RS_ZK_REGION_OPENED, expectedVersion);
697 }
698
699 /**
700 *
701 * @param zkw zk reference
702 * @param region region to be closed
703 * @param expectedVersion expected version of the znode
704 * @return true if the znode exists, has the right version and the right state. False otherwise.
705 * @throws KeeperException
706 */
707 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
708 int expectedVersion) throws KeeperException {
709
710 final String encoded = getNodeName(zkw, region.getEncodedName());
711 zkw.sync(encoded);
712
713 // Read existing data of the node
714 Stat stat = new Stat();
715 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
716
717 if (existingBytes == null) {
718 LOG.warn(zkw.prefix("Attempt to check the " +
719 "closing node for " + encoded +
720 ". The node does not exist"));
721 return false;
722 }
723
724 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
725 LOG.warn(zkw.prefix("Attempt to check the " +
726 "closing node for " + encoded +
727 ". The node existed but was version " + stat.getVersion() +
728 " not the expected version " + expectedVersion));
729 return false;
730 }
731
732 RegionTransition rt = getRegionTransition(existingBytes);
733
734 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
735 LOG.warn(zkw.prefix("Attempt to check the " +
736 "closing node for " + encoded +
737 ". The node existed but was in an unexpected state: " + rt.getEventType()));
738 return false;
739 }
740
741 return true;
742 }
743
744 /**
745 * Method that actually performs unassigned node transitions.
746 *
747 * <p>Attempts to transition the unassigned node for the specified region
748 * from the expected state to the state in the specified transition data.
749 *
750 * <p>Method first reads existing data and verifies it is in the expected
751 * state. If the node does not exist or the node is not in the expected
752 * state, the method returns -1. If the transition is successful, the
753 * version number of the node following the transition is returned.
754 *
755 * <p>If the read state is what is expected, it attempts to write the new
756 * state and data into the node. When doing this, it includes the expected
757 * version (determined when the existing state was verified) to ensure that
758 * only one transition is successful. If there is a version mismatch, the
759 * method returns -1.
760 *
761 * <p>If the write is successful, no watch is set and the method returns true.
762 *
763 * @param zkw zk reference
764 * @param region region to be transitioned to opened
765 * @param serverName server transition happens on
766 * @param endState state to transition node to if all checks pass
767 * @param beginState state the node must currently be in to do transition
768 * @param expectedVersion expected version of data before modification, or -1
769 * @return version of node after transition, -1 if unsuccessful transition
770 * @throws KeeperException if unexpected zookeeper exception
771 */
772 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
773 ServerName serverName, EventType beginState, EventType endState,
774 int expectedVersion)
775 throws KeeperException {
776 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
777 }
778
779
780 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
781 ServerName serverName, EventType beginState, EventType endState,
782 int expectedVersion, final byte [] payload)
783 throws KeeperException {
784 String encoded = region.getEncodedName();
785 if(LOG.isDebugEnabled()) {
786 LOG.debug(zkw.prefix("Attempting to transition node " +
787 HRegionInfo.prettyPrint(encoded) +
788 " from " + beginState.toString() + " to " + endState.toString()));
789 }
790
791 String node = getNodeName(zkw, encoded);
792 zkw.sync(node);
793
794 // Read existing data of the node
795 Stat stat = new Stat();
796 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
797 if (existingBytes == null) {
798 // Node no longer exists. Return -1. It means unsuccessful transition.
799 return -1;
800 }
801 RegionTransition rt = getRegionTransition(existingBytes);
802
803 // Verify it is the expected version
804 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
805 LOG.warn(zkw.prefix("Attempt to transition the " +
806 "unassigned node for " + encoded +
807 " from " + beginState + " to " + endState + " failed, " +
808 "the node existed but was version " + stat.getVersion() +
809 " not the expected version " + expectedVersion));
810 return -1;
811 } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
812 && endState.equals(EventType.RS_ZK_REGION_OPENING)
813 && expectedVersion == -1 && stat.getVersion() != 0) {
814 // the below check ensures that double assignment doesnot happen.
815 // When the node is created for the first time then the expected version
816 // that is passed will be -1 and the version in znode will be 0.
817 // In all other cases the version in znode will be > 0.
818 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
819 + encoded + " from " + beginState + " to " + endState + " failed, "
820 + "the node existed but was version " + stat.getVersion()
821 + " not the expected version " + expectedVersion));
822 return -1;
823 }
824
825 // Verify it is in expected state
826 EventType et = rt.getEventType();
827 if (!et.equals(beginState)) {
828 String existingServer = (rt.getServerName() == null)
829 ? "<unknown>" : rt.getServerName().toString();
830 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
831 + " from " + beginState + " to " + endState + " failed, the node existed but"
832 + " was in the state " + et + " set by the server " + existingServer));
833 return -1;
834 }
835
836 // Write new data, ensuring data has not changed since we last read it
837 try {
838 rt = RegionTransition.createRegionTransition(
839 endState, region.getRegionName(), serverName, payload);
840 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
841 LOG.warn(zkw.prefix("Attempt to transition the " +
842 "unassigned node for " + encoded +
843 " from " + beginState + " to " + endState + " failed, " +
844 "the node existed and was in the expected state but then when " +
845 "setting data we got a version mismatch"));
846 return -1;
847 }
848 if(LOG.isDebugEnabled()) {
849 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
850 " from " + beginState + " to " + endState));
851 }
852 return stat.getVersion() + 1;
853 } catch (KeeperException.NoNodeException nne) {
854 LOG.warn(zkw.prefix("Attempt to transition the " +
855 "unassigned node for " + encoded +
856 " from " + beginState + " to " + endState + " failed, " +
857 "the node existed and was in the expected state but then when " +
858 "setting data it no longer existed"));
859 return -1;
860 }
861 }
862
863 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
864 try {
865 return RegionTransition.parseFrom(bytes);
866 } catch (DeserializationException e) {
867 // Convert to a zk exception for now. Otherwise have to change API
868 throw ZKUtil.convert(e);
869 }
870 }
871
872 /**
873 * Gets the current data in the unassigned node for the specified region name
874 * or fully-qualified path.
875 *
876 * <p>Returns null if the region does not currently have a node.
877 *
878 * <p>Sets a watch on the node if the node exists.
879 *
880 * @param zkw zk reference
881 * @param pathOrRegionName fully-specified path or region name
882 * @return znode content
883 * @throws KeeperException if unexpected zookeeper exception
884 */
885 public static byte [] getData(ZooKeeperWatcher zkw,
886 String pathOrRegionName)
887 throws KeeperException {
888 String node = getPath(zkw, pathOrRegionName);
889 return ZKUtil.getDataAndWatch(zkw, node);
890 }
891
892 /**
893 * Gets the current data in the unassigned node for the specified region name
894 * or fully-qualified path.
895 *
896 * <p>Returns null if the region does not currently have a node.
897 *
898 * <p>Sets a watch on the node if the node exists.
899 *
900 * @param zkw zk reference
901 * @param pathOrRegionName fully-specified path or region name
902 * @param stat object to populate the version.
903 * @return znode content
904 * @throws KeeperException if unexpected zookeeper exception
905 */
906 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
907 String pathOrRegionName, Stat stat)
908 throws KeeperException {
909 String node = getPath(zkw, pathOrRegionName);
910 return ZKUtil.getDataAndWatch(zkw, node, stat);
911 }
912
913 /**
914 * Gets the current data in the unassigned node for the specified region name
915 * or fully-qualified path.
916 *
917 * <p>Returns null if the region does not currently have a node.
918 *
919 * <p>Does not set a watch.
920 *
921 * @param zkw zk reference
922 * @param pathOrRegionName fully-specified path or region name
923 * @param stat object to store node info into on getData call
924 * @return znode content
925 * @throws KeeperException if unexpected zookeeper exception
926 */
927 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
928 String pathOrRegionName, Stat stat)
929 throws KeeperException {
930 String node = getPath(zkw, pathOrRegionName);
931 return ZKUtil.getDataNoWatch(zkw, node, stat);
932 }
933
934 /**
935 * @param zkw
936 * @param pathOrRegionName
937 * @return Path to znode
938 */
939 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
940 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
941 }
942
943 /**
944 * Get the version of the specified znode
945 * @param zkw zk reference
946 * @param region region's info
947 * @return the version of the znode, -1 if it doesn't exist
948 * @throws KeeperException
949 */
950 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
951 throws KeeperException {
952 String znode = getNodeName(zkw, region.getEncodedName());
953 return ZKUtil.checkExists(zkw, znode);
954 }
955
956 /**
957 * Delete the assignment node regardless of its current state.
958 * <p>
959 * Fail silent even if the node does not exist at all.
960 * @param watcher
961 * @param regionInfo
962 * @throws KeeperException
963 */
964 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
965 HRegionInfo regionInfo)
966 throws KeeperException {
967 String node = getNodeName(watcher, regionInfo.getEncodedName());
968 ZKUtil.deleteNodeFailSilent(watcher, node);
969 }
970
971 /**
972 * Blocks until there are no node in regions in transition.
973 * <p>
974 * Used in testing only.
975 * @param zkw zk reference
976 * @throws KeeperException
977 * @throws InterruptedException
978 */
979 public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
980 throws KeeperException, InterruptedException {
981 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
982 List<String> znodes =
983 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
984 if (znodes != null && !znodes.isEmpty()) {
985 for (String znode : znodes) {
986 LOG.debug("ZK RIT -> " + znode);
987 }
988 }
989 Thread.sleep(100);
990 }
991 }
992
993 /**
994 * Blocks until there is at least one node in regions in transition.
995 * <p>
996 * Used in testing only.
997 * @param zkw zk reference
998 * @throws KeeperException
999 * @throws InterruptedException
1000 */
1001 public static void blockUntilRIT(ZooKeeperWatcher zkw)
1002 throws KeeperException, InterruptedException {
1003 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1004 List<String> znodes =
1005 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1006 if (znodes == null || znodes.isEmpty()) {
1007 LOG.debug("No RIT in ZK");
1008 }
1009 Thread.sleep(100);
1010 }
1011 }
1012
1013 /**
1014 * Presume bytes are serialized unassigned data structure
1015 * @param znodeBytes
1016 * @return String of the deserialized znode bytes.
1017 */
1018 static String toString(final byte[] znodeBytes) {
1019 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the
1020 // method in here so RegionTransition does not leak into ZKUtil.
1021 try {
1022 RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1023 return rt.toString();
1024 } catch (DeserializationException e) {
1025 return "";
1026 }
1027 }
1028 }