1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.zookeeper; 20 21 import java.util.List; 22 23 import org.apache.commons.logging.Log; 24 import org.apache.commons.logging.LogFactory; 25 import org.apache.hadoop.hbase.classification.InterfaceAudience; 26 import org.apache.hadoop.hbase.HConstants; 27 import org.apache.hadoop.hbase.HRegionInfo; 28 import org.apache.hadoop.hbase.RegionTransition; 29 import org.apache.hadoop.hbase.ServerName; 30 import org.apache.hadoop.hbase.exceptions.DeserializationException; 31 import org.apache.hadoop.hbase.executor.EventType; 32 import org.apache.zookeeper.AsyncCallback; 33 import org.apache.zookeeper.KeeperException; 34 import org.apache.zookeeper.KeeperException.Code; 35 import org.apache.zookeeper.data.Stat; 36 37 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be 38 // about zk and bytes only. 39 40 /** 41 * Utility class for doing region assignment in ZooKeeper. This class extends 42 * stuff done in {@link ZKUtil} to cover specific assignment operations. 43 * <p> 44 * Contains only static methods and constants. 45 * <p> 46 * Used by both the Master and RegionServer. 47 * <p> 48 * All valid transitions outlined below: 49 * <p> 50 * <b>MASTER</b> 51 * <ol> 52 * <li> 53 * Master creates an unassigned node as OFFLINE. 54 * - Cluster startup and table enabling. 55 * </li> 56 * <li> 57 * Master forces an existing unassigned node to OFFLINE. 58 * - RegionServer failure. 59 * - Allows transitions from all states to OFFLINE. 60 * </li> 61 * <li> 62 * Master deletes an unassigned node that was in a OPENED state. 63 * - Normal region transitions. Besides cluster startup, no other deletions 64 * of unassigned nodes is allowed. 65 * </li> 66 * <li> 67 * Master deletes all unassigned nodes regardless of state. 68 * - Cluster startup before any assignment happens. 69 * </li> 70 * </ol> 71 * <p> 72 * <b>REGIONSERVER</b> 73 * <ol> 74 * <li> 75 * RegionServer creates an unassigned node as CLOSING. 76 * - All region closes will do this in response to a CLOSE RPC from Master. 77 * - A node can never be transitioned to CLOSING, only created. 78 * </li> 79 * <li> 80 * RegionServer transitions an unassigned node from CLOSING to CLOSED. 81 * - Normal region closes. CAS operation. 82 * </li> 83 * <li> 84 * RegionServer transitions an unassigned node from OFFLINE to OPENING. 85 * - All region opens will do this in response to an OPEN RPC from the Master. 86 * - Normal region opens. CAS operation. 87 * </li> 88 * <li> 89 * RegionServer transitions an unassigned node from OPENING to OPENED. 90 * - Normal region opens. CAS operation. 91 * </li> 92 * </ol> 93 */ 94 @InterfaceAudience.Private 95 public class ZKAssign { 96 private static final Log LOG = LogFactory.getLog(ZKAssign.class); 97 98 /** 99 * Gets the full path node name for the unassigned node for the specified 100 * region. 101 * @param zkw zk reference 102 * @param regionName region name 103 * @return full path node name 104 */ 105 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { 106 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); 107 } 108 109 /** 110 * Gets the region name from the full path node name of an unassigned node. 111 * @param path full zk path 112 * @return region name 113 */ 114 public static String getRegionName(ZooKeeperWatcher zkw, String path) { 115 return path.substring(zkw.assignmentZNode.length()+1); 116 } 117 118 // Master methods 119 120 /** 121 * Creates a new unassigned node in the OFFLINE state for the specified region. 122 * 123 * <p>Does not transition nodes from other states. If a node already exists 124 * for this region, a {@link org.apache.zookeeper.KeeperException.NodeExistsException} 125 * will be thrown. 126 * 127 * <p>Sets a watcher on the unassigned region node if the method is successful. 128 * 129 * <p>This method should only be used during cluster startup and the enabling 130 * of a table. 131 * 132 * @param zkw zk reference 133 * @param region region to be created as offline 134 * @param serverName server transition will happen on 135 * @throws KeeperException if unexpected zookeeper exception 136 * @throws KeeperException.NodeExistsException if node already exists 137 */ 138 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 139 ServerName serverName) 140 throws KeeperException, KeeperException.NodeExistsException { 141 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); 142 } 143 144 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 145 ServerName serverName, final EventType event) 146 throws KeeperException, KeeperException.NodeExistsException { 147 LOG.debug(zkw.prefix("Creating unassigned node " + 148 region.getEncodedName() + " in OFFLINE state")); 149 RegionTransition rt = 150 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName); 151 String node = getNodeName(zkw, region.getEncodedName()); 152 ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 153 } 154 155 /** 156 * Creates an unassigned node in the OFFLINE state for the specified region. 157 * <p> 158 * Runs asynchronously. Depends on no pre-existing znode. 159 * 160 * <p>Sets a watcher on the unassigned region node. 161 * 162 * @param zkw zk reference 163 * @param region region to be created as offline 164 * @param serverName server transition will happen on 165 * @param cb 166 * @param ctx 167 * @throws KeeperException if unexpected zookeeper exception 168 * @throws KeeperException.NodeExistsException if node already exists 169 */ 170 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, 171 HRegionInfo region, ServerName serverName, 172 final AsyncCallback.StringCallback cb, final Object ctx) 173 throws KeeperException { 174 LOG.debug(zkw.prefix("Async create of unassigned node " + 175 region.getEncodedName() + " with OFFLINE state")); 176 RegionTransition rt = 177 RegionTransition.createRegionTransition( 178 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 179 String node = getNodeName(zkw, region.getEncodedName()); 180 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx); 181 } 182 183 /** 184 * Creates or force updates an unassigned node to the OFFLINE state for the 185 * specified region. 186 * <p> 187 * Attempts to create the node but if it exists will force it to transition to 188 * and OFFLINE state. 189 * 190 * <p>Sets a watcher on the unassigned region node if the method is 191 * successful. 192 * 193 * <p>This method should be used when assigning a region. 194 * 195 * @param zkw zk reference 196 * @param region region to be created as offline 197 * @param serverName server transition will happen on 198 * @return the version of the znode created in OFFLINE state, -1 if 199 * unsuccessful. 200 * @throws KeeperException if unexpected zookeeper exception 201 * @throws KeeperException.NodeExistsException if node already exists 202 */ 203 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw, 204 HRegionInfo region, ServerName serverName) throws KeeperException { 205 LOG.debug(zkw.prefix("Creating (or updating) unassigned node " + 206 region.getEncodedName() + " with OFFLINE state")); 207 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE, 208 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 209 byte [] data = rt.toByteArray(); 210 String node = getNodeName(zkw, region.getEncodedName()); 211 zkw.sync(node); 212 int version = ZKUtil.checkExists(zkw, node); 213 if (version == -1) { 214 return ZKUtil.createAndWatch(zkw, node, data); 215 } else { 216 boolean setData = false; 217 try { 218 setData = ZKUtil.setData(zkw, node, data, version); 219 // Setdata throws KeeperException which aborts the Master. So we are 220 // catching it here. 221 // If just before setting the znode to OFFLINE if the RS has made any 222 // change to the 223 // znode state then we need to return -1. 224 } catch (KeeperException kpe) { 225 LOG.info("Version mismatch while setting the node to OFFLINE state."); 226 return -1; 227 } 228 if (!setData) { 229 return -1; 230 } else { 231 // We successfully forced to OFFLINE, reset watch and handle if 232 // the state changed in between our set and the watch 233 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 234 rt = getRegionTransition(bytes); 235 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) { 236 // state changed, need to process 237 return -1; 238 } 239 } 240 } 241 return version + 1; 242 } 243 244 /** 245 * Deletes an existing unassigned node that is in the OPENED state for the 246 * specified region. 247 * 248 * <p>If a node does not already exist for this region, a 249 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 250 * 251 * <p>No watcher is set whether this succeeds or not. 252 * 253 * <p>Returns false if the node was not in the proper state but did exist. 254 * 255 * <p>This method is used during normal region transitions when a region 256 * finishes successfully opening. This is the Master acknowledging completion 257 * of the specified regions transition. 258 * 259 * @param zkw zk reference 260 * @param encodedRegionName opened region to be deleted from zk 261 * @param sn the expected region transition target server name 262 * @throws KeeperException if unexpected zookeeper exception 263 * @throws KeeperException.NoNodeException if node does not exist 264 */ 265 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw, 266 String encodedRegionName, ServerName sn) 267 throws KeeperException, KeeperException.NoNodeException { 268 return deleteNode(zkw, encodedRegionName, 269 EventType.RS_ZK_REGION_OPENED, sn); 270 } 271 272 /** 273 * Deletes an existing unassigned node that is in the OFFLINE state for the 274 * specified region. 275 * 276 * <p>If a node does not already exist for this region, a 277 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 278 * 279 * <p>No watcher is set whether this succeeds or not. 280 * 281 * <p>Returns false if the node was not in the proper state but did exist. 282 * 283 * <p>This method is used during master failover when the regions on an RS 284 * that has died are all set to OFFLINE before being processed. 285 * 286 * @param zkw zk reference 287 * @param encodedRegionName closed region to be deleted from zk 288 * @param sn the expected region transition target server name 289 * @throws KeeperException if unexpected zookeeper exception 290 * @throws KeeperException.NoNodeException if node does not exist 291 */ 292 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw, 293 String encodedRegionName, ServerName sn) 294 throws KeeperException, KeeperException.NoNodeException { 295 return deleteNode(zkw, encodedRegionName, 296 EventType.M_ZK_REGION_OFFLINE, sn); 297 } 298 299 /** 300 * Deletes an existing unassigned node that is in the CLOSED state for the 301 * specified region. 302 * 303 * <p>If a node does not already exist for this region, a 304 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 305 * 306 * <p>No watcher is set whether this succeeds or not. 307 * 308 * <p>Returns false if the node was not in the proper state but did exist. 309 * 310 * <p>This method is used during table disables when a region finishes 311 * successfully closing. This is the Master acknowledging completion 312 * of the specified regions transition to being closed. 313 * 314 * @param zkw zk reference 315 * @param encodedRegionName closed region to be deleted from zk 316 * @param sn the expected region transition target server name 317 * @throws KeeperException if unexpected zookeeper exception 318 * @throws KeeperException.NoNodeException if node does not exist 319 */ 320 public static boolean deleteClosedNode(ZooKeeperWatcher zkw, 321 String encodedRegionName, ServerName sn) 322 throws KeeperException, KeeperException.NoNodeException { 323 return deleteNode(zkw, encodedRegionName, 324 EventType.RS_ZK_REGION_CLOSED, sn); 325 } 326 327 /** 328 * Deletes an existing unassigned node that is in the CLOSING state for the 329 * specified region. 330 * 331 * <p>If a node does not already exist for this region, a 332 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 333 * 334 * <p>No watcher is set whether this succeeds or not. 335 * 336 * <p>Returns false if the node was not in the proper state but did exist. 337 * 338 * <p>This method is used during table disables when a region finishes 339 * successfully closing. This is the Master acknowledging completion 340 * of the specified regions transition to being closed. 341 * 342 * @param zkw zk reference 343 * @param region closing region to be deleted from zk 344 * @param sn the expected region transition target server name 345 * @throws KeeperException if unexpected zookeeper exception 346 * @throws KeeperException.NoNodeException if node does not exist 347 */ 348 public static boolean deleteClosingNode(ZooKeeperWatcher zkw, 349 HRegionInfo region, ServerName sn) 350 throws KeeperException, KeeperException.NoNodeException { 351 String encodedRegionName = region.getEncodedName(); 352 return deleteNode(zkw, encodedRegionName, 353 EventType.M_ZK_REGION_CLOSING, sn); 354 } 355 356 /** 357 * Deletes an existing unassigned node that is in the specified state for the 358 * specified region. 359 * 360 * <p>If a node does not already exist for this region, a 361 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 362 * 363 * <p>No watcher is set whether this succeeds or not. 364 * 365 * <p>Returns false if the node was not in the proper state but did exist. 366 * 367 * <p>This method is used when a region finishes opening/closing. 368 * The Master acknowledges completion 369 * of the specified regions transition to being closed/opened. 370 * 371 * @param zkw zk reference 372 * @param encodedRegionName region to be deleted from zk 373 * @param expectedState state region must be in for delete to complete 374 * @param sn the expected region transition target server name 375 * @throws KeeperException if unexpected zookeeper exception 376 * @throws KeeperException.NoNodeException if node does not exist 377 */ 378 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 379 EventType expectedState, ServerName sn) 380 throws KeeperException, KeeperException.NoNodeException { 381 return deleteNode(zkw, encodedRegionName, expectedState, sn, -1); 382 } 383 384 /** 385 * Deletes an existing unassigned node that is in the specified state for the 386 * specified region. 387 * 388 * <p>If a node does not already exist for this region, a 389 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 390 * 391 * <p>No watcher is set whether this succeeds or not. 392 * 393 * <p>Returns false if the node was not in the proper state but did exist. 394 * 395 * <p>This method is used when a region finishes opening/closing. 396 * The Master acknowledges completion 397 * of the specified regions transition to being closed/opened. 398 * 399 * @param zkw zk reference 400 * @param encodedRegionName region to be deleted from zk 401 * @param expectedState state region must be in for delete to complete 402 * @param expectedVersion of the znode that is to be deleted. 403 * If expectedVersion need not be compared while deleting the znode 404 * pass -1 405 * @throws KeeperException if unexpected zookeeper exception 406 * @throws KeeperException.NoNodeException if node does not exist 407 */ 408 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 409 EventType expectedState, int expectedVersion) 410 throws KeeperException, KeeperException.NoNodeException { 411 return deleteNode(zkw, encodedRegionName, expectedState, null, expectedVersion); 412 } 413 414 /** 415 * Deletes an existing unassigned node that is in the specified state for the 416 * specified region. 417 * 418 * <p>If a node does not already exist for this region, a 419 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown. 420 * 421 * <p>No watcher is set whether this succeeds or not. 422 * 423 * <p>Returns false if the node was not in the proper state but did exist. 424 * 425 * <p>This method is used when a region finishes opening/closing. 426 * The Master acknowledges completion 427 * of the specified regions transition to being closed/opened. 428 * 429 * @param zkw zk reference 430 * @param encodedRegionName region to be deleted from zk 431 * @param expectedState state region must be in for delete to complete 432 * @param serverName the expected region transition target server name 433 * @param expectedVersion of the znode that is to be deleted. 434 * If expectedVersion need not be compared while deleting the znode 435 * pass -1 436 * @throws KeeperException if unexpected zookeeper exception 437 * @throws KeeperException.NoNodeException if node does not exist 438 */ 439 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 440 EventType expectedState, ServerName serverName, int expectedVersion) 441 throws KeeperException, KeeperException.NoNodeException { 442 if (LOG.isTraceEnabled()) { 443 LOG.trace(zkw.prefix("Deleting existing unassigned " + 444 "node " + encodedRegionName + " in expected state " + expectedState)); 445 } 446 String node = getNodeName(zkw, encodedRegionName); 447 zkw.sync(node); 448 Stat stat = new Stat(); 449 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat); 450 if (bytes == null) { 451 // If it came back null, node does not exist. 452 throw KeeperException.create(Code.NONODE); 453 } 454 RegionTransition rt = getRegionTransition(bytes); 455 EventType et = rt.getEventType(); 456 if (!et.equals(expectedState)) { 457 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " + 458 expectedState + " state but node is in " + et + " state")); 459 return false; 460 } 461 // Verify the server transition happens on is not changed 462 if (serverName != null && !rt.getServerName().equals(serverName)) { 463 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName 464 + " with target " + serverName + " but node has " + rt.getServerName())); 465 return false; 466 } 467 if (expectedVersion != -1 468 && stat.getVersion() != expectedVersion) { 469 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" + 470 " the expected one. Got a version mismatch"); 471 return false; 472 } 473 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { 474 LOG.warn(zkw.prefix("Attempting to delete " + 475 "unassigned node " + encodedRegionName + " in " + expectedState + 476 " state but after verifying state, we got a version mismatch")); 477 return false; 478 } 479 LOG.debug(zkw.prefix("Deleted unassigned node " + 480 encodedRegionName + " in expected state " + expectedState)); 481 return true; 482 } 483 484 /** 485 * Deletes all unassigned nodes regardless of their state. 486 * 487 * <p>No watchers are set. 488 * 489 * <p>This method is used by the Master during cluster startup to clear out 490 * any existing state from other cluster runs. 491 * 492 * @param zkw zk reference 493 * @throws KeeperException if unexpected zookeeper exception 494 */ 495 public static void deleteAllNodes(ZooKeeperWatcher zkw) 496 throws KeeperException { 497 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes")); 498 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode); 499 } 500 501 /** 502 * Creates a new unassigned node in the CLOSING state for the specified 503 * region. 504 * 505 * <p>Does not transition nodes from any states. If a node already exists 506 * for this region, a {@link org.apache.zookeeper.KeeperException.NodeExistsException} 507 * will be thrown. 508 * 509 * <p>If creation is successful, returns the version number of the CLOSING 510 * node created. 511 * 512 * <p>Set a watch. 513 * 514 * <p>This method should only be used by a Master when initiating a 515 * close of a region before sending a close request to the region server. 516 * 517 * @param zkw zk reference 518 * @param region region to be created as closing 519 * @param serverName server transition will happen on 520 * @return version of node after transition, -1 if unsuccessful transition 521 * @throws KeeperException if unexpected zookeeper exception 522 * @throws KeeperException.NodeExistsException if node already exists 523 */ 524 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, 525 ServerName serverName) 526 throws KeeperException, KeeperException.NodeExistsException { 527 LOG.debug(zkw.prefix("Creating unassigned node " + 528 region.getEncodedName() + " in a CLOSING state")); 529 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING, 530 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 531 String node = getNodeName(zkw, region.getEncodedName()); 532 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 533 } 534 535 // RegionServer methods 536 537 /** 538 * Transitions an existing unassigned node for the specified region which is 539 * currently in the CLOSING state to be in the CLOSED state. 540 * 541 * <p>Does not transition nodes from other states. If for some reason the 542 * node could not be transitioned, the method returns -1. If the transition 543 * is successful, the version of the node after transition is returned. 544 * 545 * <p>This method can fail and return false for three different reasons: 546 * <ul><li>Unassigned node for this region does not exist</li> 547 * <li>Unassigned node for this region is not in CLOSING state</li> 548 * <li>After verifying CLOSING state, update fails because of wrong version 549 * (someone else already transitioned the node)</li> 550 * </ul> 551 * 552 * <p>Does not set any watches. 553 * 554 * <p>This method should only be used by a RegionServer when initiating a 555 * close of a region after receiving a CLOSE RPC from the Master. 556 * 557 * @param zkw zk reference 558 * @param region region to be transitioned to closed 559 * @param serverName server transition happens on 560 * @return version of node after transition, -1 if unsuccessful transition 561 * @throws KeeperException if unexpected zookeeper exception 562 */ 563 public static int transitionNodeClosed(ZooKeeperWatcher zkw, 564 HRegionInfo region, ServerName serverName, int expectedVersion) 565 throws KeeperException { 566 return transitionNode(zkw, region, serverName, 567 EventType.M_ZK_REGION_CLOSING, 568 EventType.RS_ZK_REGION_CLOSED, expectedVersion); 569 } 570 571 /** 572 * Transitions an existing unassigned node for the specified region which is 573 * currently in the OFFLINE state to be in the OPENING state. 574 * 575 * <p>Does not transition nodes from other states. If for some reason the 576 * node could not be transitioned, the method returns -1. If the transition 577 * is successful, the version of the node written as OPENING is returned. 578 * 579 * <p>This method can fail and return -1 for three different reasons: 580 * <ul><li>Unassigned node for this region does not exist</li> 581 * <li>Unassigned node for this region is not in OFFLINE state</li> 582 * <li>After verifying OFFLINE state, update fails because of wrong version 583 * (someone else already transitioned the node)</li> 584 * </ul> 585 * 586 * <p>Does not set any watches. 587 * 588 * <p>This method should only be used by a RegionServer when initiating an 589 * open of a region after receiving an OPEN RPC from the Master. 590 * 591 * @param zkw zk reference 592 * @param region region to be transitioned to opening 593 * @param serverName server transition happens on 594 * @return version of node after transition, -1 if unsuccessful transition 595 * @throws KeeperException if unexpected zookeeper exception 596 */ 597 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 598 HRegionInfo region, ServerName serverName) 599 throws KeeperException { 600 return transitionNodeOpening(zkw, region, serverName, 601 EventType.M_ZK_REGION_OFFLINE); 602 } 603 604 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 605 HRegionInfo region, ServerName serverName, final EventType beginState) 606 throws KeeperException { 607 return transitionNode(zkw, region, serverName, beginState, 608 EventType.RS_ZK_REGION_OPENING, -1); 609 } 610 611 /** 612 * Confirm an existing unassigned node for the specified region which is 613 * currently in the OPENING state to be still in the OPENING state on 614 * the specified server. 615 * 616 * <p>If for some reason the check fails, the method returns -1. Otherwise, 617 * the version of the node (same as the expected version) is returned. 618 * 619 * <p>This method can fail and return -1 for three different reasons: 620 * <ul><li>Unassigned node for this region does not exist</li> 621 * <li>Unassigned node for this region is not in OPENING state</li> 622 * <li>After verifying OPENING state, the server name or the version of the 623 * doesn't match)</li> 624 * </ul> 625 * 626 * <p>Does not set any watches. 627 * 628 * <p>This method should only be used by a RegionServer when initiating an 629 * open of a region after receiving an OPEN RPC from the Master. 630 * 631 * @param zkw zk reference 632 * @param region region to be transitioned to opening 633 * @param serverName server transition happens on 634 * @return version of node after transition, -1 if unsuccessful transition 635 * @throws KeeperException if unexpected zookeeper exception 636 */ 637 public static int confirmNodeOpening(ZooKeeperWatcher zkw, 638 HRegionInfo region, ServerName serverName, int expectedVersion) 639 throws KeeperException { 640 641 String encoded = region.getEncodedName(); 642 if(LOG.isDebugEnabled()) { 643 LOG.debug(zkw.prefix("Attempting to retransition opening state of node " + 644 HRegionInfo.prettyPrint(encoded))); 645 } 646 647 String node = getNodeName(zkw, encoded); 648 zkw.sync(node); 649 650 // Read existing data of the node 651 Stat stat = new Stat(); 652 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 653 if (existingBytes == null) { 654 // Node no longer exists. Return -1. It means unsuccessful transition. 655 return -1; 656 } 657 RegionTransition rt = getRegionTransition(existingBytes); 658 659 // Verify it is the expected version 660 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 661 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " + 662 "unassigned node for " + encoded + " failed, " + 663 "the node existed but was version " + stat.getVersion() + 664 " not the expected version " + expectedVersion)); 665 return -1; 666 } 667 668 // Verify it is in expected state 669 EventType et = rt.getEventType(); 670 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) { 671 String existingServer = (rt.getServerName() == null) 672 ? "<unknown>" : rt.getServerName().toString(); 673 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for " 674 + encoded + " failed, the node existed but was in the state " + et + 675 " set by the server " + existingServer)); 676 return -1; 677 } 678 679 return expectedVersion; 680 } 681 682 /** 683 * Transitions an existing unassigned node for the specified region which is 684 * currently in the OPENING state to be in the OPENED state. 685 * 686 * <p>Does not transition nodes from other states. If for some reason the 687 * node could not be transitioned, the method returns -1. If the transition 688 * is successful, the version of the node after transition is returned. 689 * 690 * <p>This method can fail and return false for three different reasons: 691 * <ul><li>Unassigned node for this region does not exist</li> 692 * <li>Unassigned node for this region is not in OPENING state</li> 693 * <li>After verifying OPENING state, update fails because of wrong version 694 * (this should never actually happen since an RS only does this transition 695 * following a transition to OPENING. if two RS are conflicting, one would 696 * fail the original transition to OPENING and not this transition)</li> 697 * </ul> 698 * 699 * <p>Does not set any watches. 700 * 701 * <p>This method should only be used by a RegionServer when completing the 702 * open of a region. 703 * 704 * @param zkw zk reference 705 * @param region region to be transitioned to opened 706 * @param serverName server transition happens on 707 * @return version of node after transition, -1 if unsuccessful transition 708 * @throws KeeperException if unexpected zookeeper exception 709 */ 710 public static int transitionNodeOpened(ZooKeeperWatcher zkw, 711 HRegionInfo region, ServerName serverName, int expectedVersion) 712 throws KeeperException { 713 return transitionNode(zkw, region, serverName, 714 EventType.RS_ZK_REGION_OPENING, 715 EventType.RS_ZK_REGION_OPENED, expectedVersion); 716 } 717 718 /** 719 * 720 * @param zkw zk reference 721 * @param region region to be closed 722 * @param expectedVersion expected version of the znode 723 * @return true if the znode exists, has the right version and the right state. False otherwise. 724 * @throws KeeperException 725 */ 726 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region, 727 int expectedVersion) throws KeeperException { 728 729 final String encoded = getNodeName(zkw, region.getEncodedName()); 730 zkw.sync(encoded); 731 732 // Read existing data of the node 733 Stat stat = new Stat(); 734 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat); 735 736 if (existingBytes == null) { 737 LOG.warn(zkw.prefix("Attempt to check the " + 738 "closing node for " + encoded + 739 ". The node does not exist")); 740 return false; 741 } 742 743 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 744 LOG.warn(zkw.prefix("Attempt to check the " + 745 "closing node for " + encoded + 746 ". The node existed but was version " + stat.getVersion() + 747 " not the expected version " + expectedVersion)); 748 return false; 749 } 750 751 RegionTransition rt = getRegionTransition(existingBytes); 752 753 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) { 754 LOG.warn(zkw.prefix("Attempt to check the " + 755 "closing node for " + encoded + 756 ". The node existed but was in an unexpected state: " + rt.getEventType())); 757 return false; 758 } 759 760 return true; 761 } 762 763 /** 764 * Method that actually performs unassigned node transitions. 765 * 766 * <p>Attempts to transition the unassigned node for the specified region 767 * from the expected state to the state in the specified transition data. 768 * 769 * <p>Method first reads existing data and verifies it is in the expected 770 * state. If the node does not exist or the node is not in the expected 771 * state, the method returns -1. If the transition is successful, the 772 * version number of the node following the transition is returned. 773 * 774 * <p>If the read state is what is expected, it attempts to write the new 775 * state and data into the node. When doing this, it includes the expected 776 * version (determined when the existing state was verified) to ensure that 777 * only one transition is successful. If there is a version mismatch, the 778 * method returns -1. 779 * 780 * <p>If the write is successful, no watch is set and the method returns true. 781 * 782 * @param zkw zk reference 783 * @param region region to be transitioned to opened 784 * @param serverName server transition happens on 785 * @param endState state to transition node to if all checks pass 786 * @param beginState state the node must currently be in to do transition 787 * @param expectedVersion expected version of data before modification, or -1 788 * @return version of node after transition, -1 if unsuccessful transition 789 * @throws KeeperException if unexpected zookeeper exception 790 */ 791 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 792 ServerName serverName, EventType beginState, EventType endState, 793 int expectedVersion) 794 throws KeeperException { 795 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null); 796 } 797 798 799 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 800 ServerName serverName, EventType beginState, EventType endState, 801 int expectedVersion, final byte [] payload) 802 throws KeeperException { 803 String encoded = region.getEncodedName(); 804 if(LOG.isDebugEnabled()) { 805 LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) + 806 " from " + beginState.toString() + " to " + endState.toString())); 807 } 808 809 String node = getNodeName(zkw, encoded); 810 zkw.sync(node); 811 812 // Read existing data of the node 813 Stat stat = new Stat(); 814 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 815 if (existingBytes == null) { 816 // Node no longer exists. Return -1. It means unsuccessful transition. 817 return -1; 818 } 819 820 // Verify it is the expected version 821 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 822 LOG.warn(zkw.prefix("Attempt to transition the " + 823 "unassigned node for " + encoded + 824 " from " + beginState + " to " + endState + " failed, " + 825 "the node existed but was version " + stat.getVersion() + 826 " not the expected version " + expectedVersion)); 827 return -1; 828 } 829 830 if (beginState.equals(EventType.M_ZK_REGION_OFFLINE) 831 && endState.equals(EventType.RS_ZK_REGION_OPENING) 832 && expectedVersion == -1 && stat.getVersion() != 0) { 833 // the below check ensures that double assignment doesnot happen. 834 // When the node is created for the first time then the expected version 835 // that is passed will be -1 and the version in znode will be 0. 836 // In all other cases the version in znode will be > 0. 837 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for " 838 + encoded + " from " + beginState + " to " + endState + " failed, " 839 + "the node existed but was version " + stat.getVersion() 840 + " not the expected version " + expectedVersion)); 841 return -1; 842 } 843 844 RegionTransition rt = getRegionTransition(existingBytes); 845 846 // Verify the server transition happens on is not changed 847 if (!rt.getServerName().equals(serverName)) { 848 LOG.warn(zkw.prefix("Attempt to transition the " + 849 "unassigned node for " + encoded + 850 " from " + beginState + " to " + endState + " failed, " + 851 "the server that tried to transition was " + serverName + 852 " not the expected " + rt.getServerName())); 853 return -1; 854 } 855 856 // Verify it is in expected state 857 EventType et = rt.getEventType(); 858 if (!et.equals(beginState)) { 859 String existingServer = (rt.getServerName() == null) 860 ? "<unknown>" : rt.getServerName().toString(); 861 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded 862 + " from " + beginState + " to " + endState + " failed, the node existed but" 863 + " was in the state " + et + " set by the server " + existingServer)); 864 return -1; 865 } 866 867 // Write new data, ensuring data has not changed since we last read it 868 try { 869 rt = RegionTransition.createRegionTransition( 870 endState, region.getRegionName(), serverName, payload); 871 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) { 872 LOG.warn(zkw.prefix("Attempt to transition the " + 873 "unassigned node for " + encoded + 874 " from " + beginState + " to " + endState + " failed, " + 875 "the node existed and was in the expected state but then when " + 876 "setting data we got a version mismatch")); 877 return -1; 878 } 879 if(LOG.isDebugEnabled()) { 880 LOG.debug(zkw.prefix("Transitioned node " + encoded + 881 " from " + beginState + " to " + endState)); 882 } 883 return stat.getVersion() + 1; 884 } catch (KeeperException.NoNodeException nne) { 885 LOG.warn(zkw.prefix("Attempt to transition the " + 886 "unassigned node for " + encoded + 887 " from " + beginState + " to " + endState + " failed, " + 888 "the node existed and was in the expected state but then when " + 889 "setting data it no longer existed")); 890 return -1; 891 } 892 } 893 894 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException { 895 try { 896 return RegionTransition.parseFrom(bytes); 897 } catch (DeserializationException e) { 898 // Convert to a zk exception for now. Otherwise have to change API 899 throw ZKUtil.convert(e); 900 } 901 } 902 903 /** 904 * Gets the current data in the unassigned node for the specified region name 905 * or fully-qualified path. 906 * 907 * <p>Returns null if the region does not currently have a node. 908 * 909 * <p>Sets a watch on the node if the node exists. 910 * 911 * @param zkw zk reference 912 * @param pathOrRegionName fully-specified path or region name 913 * @return znode content 914 * @throws KeeperException if unexpected zookeeper exception 915 */ 916 public static byte [] getData(ZooKeeperWatcher zkw, 917 String pathOrRegionName) 918 throws KeeperException { 919 String node = getPath(zkw, pathOrRegionName); 920 return ZKUtil.getDataAndWatch(zkw, node); 921 } 922 923 /** 924 * Gets the current data in the unassigned node for the specified region name 925 * or fully-qualified path. 926 * 927 * <p>Returns null if the region does not currently have a node. 928 * 929 * <p>Sets a watch on the node if the node exists. 930 * 931 * @param zkw zk reference 932 * @param pathOrRegionName fully-specified path or region name 933 * @param stat object to populate the version. 934 * @return znode content 935 * @throws KeeperException if unexpected zookeeper exception 936 */ 937 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw, 938 String pathOrRegionName, Stat stat) 939 throws KeeperException { 940 String node = getPath(zkw, pathOrRegionName); 941 return ZKUtil.getDataAndWatch(zkw, node, stat); 942 } 943 944 /** 945 * Gets the current data in the unassigned node for the specified region name 946 * or fully-qualified path. 947 * 948 * <p>Returns null if the region does not currently have a node. 949 * 950 * <p>Does not set a watch. 951 * 952 * @param zkw zk reference 953 * @param pathOrRegionName fully-specified path or region name 954 * @param stat object to store node info into on getData call 955 * @return znode content 956 * @throws KeeperException if unexpected zookeeper exception 957 */ 958 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw, 959 String pathOrRegionName, Stat stat) 960 throws KeeperException { 961 String node = getPath(zkw, pathOrRegionName); 962 return ZKUtil.getDataNoWatch(zkw, node, stat); 963 } 964 965 /** 966 * @param zkw 967 * @param pathOrRegionName 968 * @return Path to znode 969 */ 970 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) { 971 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName); 972 } 973 974 /** 975 * Get the version of the specified znode 976 * @param zkw zk reference 977 * @param region region's info 978 * @return the version of the znode, -1 if it doesn't exist 979 * @throws KeeperException 980 */ 981 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region) 982 throws KeeperException { 983 String znode = getNodeName(zkw, region.getEncodedName()); 984 return ZKUtil.checkExists(zkw, znode); 985 } 986 987 /** 988 * Delete the assignment node regardless of its current state. 989 * <p> 990 * Fail silent even if the node does not exist at all. 991 * @param watcher 992 * @param regionInfo 993 * @throws KeeperException 994 */ 995 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher, 996 HRegionInfo regionInfo) 997 throws KeeperException { 998 String node = getNodeName(watcher, regionInfo.getEncodedName()); 999 ZKUtil.deleteNodeFailSilent(watcher, node); 1000 } 1001 1002 /** 1003 * Blocks until there are no node in regions in transition. 1004 * <p> 1005 * Used in testing only. 1006 * @param zkw zk reference 1007 * @throws KeeperException 1008 * @throws InterruptedException 1009 */ 1010 public static void blockUntilNoRIT(ZooKeeperWatcher zkw) 1011 throws KeeperException, InterruptedException { 1012 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 1013 List<String> znodes = 1014 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 1015 if (znodes != null && !znodes.isEmpty()) { 1016 LOG.debug("Waiting on RIT: " + znodes); 1017 } 1018 Thread.sleep(100); 1019 } 1020 } 1021 1022 /** 1023 * Blocks until there is at least one node in regions in transition. 1024 * <p> 1025 * Used in testing only. 1026 * @param zkw zk reference 1027 * @throws KeeperException 1028 * @throws InterruptedException 1029 */ 1030 public static void blockUntilRIT(ZooKeeperWatcher zkw) 1031 throws KeeperException, InterruptedException { 1032 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 1033 List<String> znodes = 1034 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 1035 if (znodes == null || znodes.isEmpty()) { 1036 LOG.debug("No RIT in ZK"); 1037 } 1038 Thread.sleep(100); 1039 } 1040 } 1041 1042 /** 1043 * Presume bytes are serialized unassigned data structure 1044 * @param znodeBytes 1045 * @return String of the deserialized znode bytes. 1046 */ 1047 static String toString(final byte[] znodeBytes) { 1048 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the 1049 // method in here so RegionTransition does not leak into ZKUtil. 1050 try { 1051 RegionTransition rt = RegionTransition.parseFrom(znodeBytes); 1052 return rt.toString(); 1053 } catch (DeserializationException e) { 1054 return ""; 1055 } 1056 } 1057 }