001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.master.assignment; 021 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.Comparator; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031import java.util.Set; 032import java.util.SortedSet; 033import java.util.TreeSet; 034import java.util.concurrent.ConcurrentHashMap; 035import java.util.concurrent.ConcurrentSkipListMap; 036import java.util.concurrent.atomic.AtomicInteger; 037import java.util.function.Predicate; 038import java.util.stream.Collectors; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.HRegionLocation; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.client.RegionInfo; 044import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; 045import org.apache.hadoop.hbase.master.RegionState; 046import org.apache.hadoop.hbase.master.RegionState.State; 047import org.apache.hadoop.hbase.procedure2.ProcedureEvent; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 050import org.apache.yetus.audience.InterfaceAudience; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 055 056/** 057 * RegionStates contains a set of Maps that describes the in-memory state of the AM, with 058 * the regions available in the system, the region in transition, the offline regions and 059 * the servers holding regions. 060 */ 061@InterfaceAudience.Private 062public class RegionStates { 063 private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class); 064 065 protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] { 066 State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver 067 // but before complete finish of AssignProcedure. HBASE-20100. 068 State.OFFLINE, State.CLOSED, // disable/offline 069 State.SPLITTING, State.SPLIT, // ServerCrashProcedure 070 State.OPENING, State.FAILED_OPEN, // already in-progress (retrying) 071 }; 072 073 protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] { 074 State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure 075 State.OPEN, // enabled/open 076 State.CLOSING // already in-progress (retrying) 077 }; 078 079 private static class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> { 080 public AssignmentProcedureEvent(final RegionInfo regionInfo) { 081 super(regionInfo); 082 } 083 } 084 085 private static class ServerReportEvent extends ProcedureEvent<ServerName> { 086 public ServerReportEvent(final ServerName serverName) { 087 super(serverName); 088 } 089 } 090 091 /** 092 * Current Region State. 093 * In-memory only. Not persisted. 094 */ 095 // Mutable/Immutable? Changes have to be synchronized or not? 096 // Data members are volatile which seems to say multi-threaded access is fine. 097 // In the below we do check and set but the check state could change before 098 // we do the set because no synchronization....which seems dodgy. Clear up 099 // understanding here... how many threads accessing? Do locks make it so one 100 // thread at a time working on a single Region's RegionStateNode? Lets presume 101 // so for now. Odd is that elsewhere in this RegionStates, we synchronize on 102 // the RegionStateNode instance. TODO. 103 public static class RegionStateNode implements Comparable<RegionStateNode> { 104 private final RegionInfo regionInfo; 105 private final ProcedureEvent<?> event; 106 107 private volatile RegionTransitionProcedure procedure = null; 108 private volatile ServerName regionLocation = null; 109 // notice that, the lastHost will only be updated when a region is successfully CLOSED through 110 // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync 111 // with the data in meta. 112 private volatile ServerName lastHost = null; 113 /** 114 * A Region-in-Transition (RIT) moves through states. 115 * See {@link State} for complete list. A Region that 116 * is opened moves from OFFLINE => OPENING => OPENED. 117 */ 118 private volatile State state = State.OFFLINE; 119 120 /** 121 * Updated whenever a call to {@link #setRegionLocation(ServerName)} 122 * or {@link #setState(State, State...)}. 123 */ 124 private volatile long lastUpdate = 0; 125 126 private volatile long openSeqNum = HConstants.NO_SEQNUM; 127 128 public RegionStateNode(final RegionInfo regionInfo) { 129 this.regionInfo = regionInfo; 130 this.event = new AssignmentProcedureEvent(regionInfo); 131 } 132 133 /** 134 * @param update new region state this node should be assigned. 135 * @param expected current state should be in this given list of expected states 136 * @return true, if current state is in expected list; otherwise false. 137 */ 138 public boolean setState(final State update, final State... expected) { 139 if (!isInState(expected)) { 140 return false; 141 } 142 this.state = update; 143 this.lastUpdate = EnvironmentEdgeManager.currentTime(); 144 return true; 145 } 146 147 /** 148 * Put region into OFFLINE mode (set state and clear location). 149 * @return Last recorded server deploy 150 */ 151 public ServerName offline() { 152 setState(State.OFFLINE); 153 return setRegionLocation(null); 154 } 155 156 /** 157 * Set new {@link State} but only if currently in <code>expected</code> State 158 * (if not, throw {@link UnexpectedStateException}. 159 */ 160 public void transitionState(final State update, final State... expected) 161 throws UnexpectedStateException { 162 if (!setState(update, expected)) { 163 throw new UnexpectedStateException("Expected " + Arrays.toString(expected) + 164 " so could move to " + update + " but current state=" + getState()); 165 } 166 } 167 168 public boolean isInState(final State... expected) { 169 if (expected != null && expected.length > 0) { 170 boolean expectedState = false; 171 for (int i = 0; i < expected.length; ++i) { 172 expectedState |= (getState() == expected[i]); 173 } 174 return expectedState; 175 } 176 return true; 177 } 178 179 public boolean isStuck() { 180 return isInState(State.FAILED_OPEN) && getProcedure() != null; 181 } 182 183 public boolean isInTransition() { 184 return getProcedure() != null; 185 } 186 187 public long getLastUpdate() { 188 return procedure != null ? procedure.getLastUpdate() : lastUpdate; 189 } 190 191 public void setLastHost(final ServerName serverName) { 192 this.lastHost = serverName; 193 } 194 195 public void setOpenSeqNum(final long seqId) { 196 this.openSeqNum = seqId; 197 } 198 199 public ServerName setRegionLocation(final ServerName serverName) { 200 ServerName lastRegionLocation = this.regionLocation; 201 if (LOG.isTraceEnabled() && serverName == null) { 202 LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE")); 203 } 204 this.regionLocation = serverName; 205 this.lastUpdate = EnvironmentEdgeManager.currentTime(); 206 return lastRegionLocation; 207 } 208 209 public boolean setProcedure(final RegionTransitionProcedure proc) { 210 if (this.procedure != null && this.procedure != proc) { 211 return false; 212 } 213 this.procedure = proc; 214 return true; 215 } 216 217 public boolean unsetProcedure(final RegionTransitionProcedure proc) { 218 if (this.procedure != null && this.procedure != proc) { 219 return false; 220 } 221 this.procedure = null; 222 return true; 223 } 224 225 public RegionTransitionProcedure getProcedure() { 226 return procedure; 227 } 228 229 public ProcedureEvent<?> getProcedureEvent() { 230 return event; 231 } 232 233 public RegionInfo getRegionInfo() { 234 return regionInfo; 235 } 236 237 public TableName getTable() { 238 return getRegionInfo().getTable(); 239 } 240 241 public boolean isSystemTable() { 242 return getTable().isSystemTable(); 243 } 244 245 public ServerName getLastHost() { 246 return lastHost; 247 } 248 249 public ServerName getRegionLocation() { 250 return regionLocation; 251 } 252 253 public State getState() { 254 return state; 255 } 256 257 public long getOpenSeqNum() { 258 return openSeqNum; 259 } 260 261 public int getFormatVersion() { 262 // we don't have any format for now 263 // it should probably be in regionInfo.getFormatVersion() 264 return 0; 265 } 266 267 public RegionState toRegionState() { 268 return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation()); 269 } 270 271 @Override 272 public int compareTo(final RegionStateNode other) { 273 // NOTE: RegionInfo sort by table first, so we are relying on that. 274 // we have a TestRegionState#testOrderedByTable() that check for that. 275 return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo()); 276 } 277 278 @Override 279 public int hashCode() { 280 return getRegionInfo().hashCode(); 281 } 282 283 @Override 284 public boolean equals(final Object other) { 285 if (this == other) return true; 286 if (!(other instanceof RegionStateNode)) return false; 287 return compareTo((RegionStateNode)other) == 0; 288 } 289 290 @Override 291 public String toString() { 292 return toDescriptiveString(); 293 } 294 295 public String toShortString() { 296 // rit= is the current Region-In-Transition State -- see State enum. 297 return String.format("rit=%s, location=%s", getState(), getRegionLocation()); 298 } 299 300 public String toDescriptiveString() { 301 return String.format("%s, table=%s, region=%s", 302 toShortString(), getTable(), getRegionInfo().getEncodedName()); 303 } 304 } 305 306 // This comparator sorts the RegionStates by time stamp then Region name. 307 // Comparing by timestamp alone can lead us to discard different RegionStates that happen 308 // to share a timestamp. 309 private static class RegionStateStampComparator implements Comparator<RegionState> { 310 @Override 311 public int compare(final RegionState l, final RegionState r) { 312 int stampCmp = Long.compare(l.getStamp(), r.getStamp()); 313 return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion()); 314 } 315 } 316 317 /** 318 * Server State. 319 */ 320 public enum ServerState { 321 /** 322 * Initial state. Available. 323 */ 324 ONLINE, 325 326 /** 327 * Only server which carries meta can have this state. We will split wal for meta and then 328 * assign meta first before splitting other wals. 329 */ 330 SPLITTING_META, 331 332 /** 333 * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure 334 * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more 335 * details. 336 */ 337 SPLITTING_META_DONE, 338 339 /** 340 * Server expired/crashed. Currently undergoing WAL splitting. 341 */ 342 SPLITTING, 343 344 /** 345 * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely 346 * quit. See the comments in UnassignProcedure.remoteCallFailed for more details. 347 */ 348 OFFLINE 349 } 350 351 /** 352 * State of Server; list of hosted regions, etc. 353 */ 354 public static class ServerStateNode implements Comparable<ServerStateNode> { 355 private final ServerReportEvent reportEvent; 356 357 private final Set<RegionStateNode> regions; 358 private final ServerName serverName; 359 360 private volatile ServerState state = ServerState.ONLINE; 361 362 public ServerStateNode(final ServerName serverName) { 363 this.serverName = serverName; 364 this.regions = ConcurrentHashMap.newKeySet(); 365 this.reportEvent = new ServerReportEvent(serverName); 366 } 367 368 public ServerName getServerName() { 369 return serverName; 370 } 371 372 public ServerState getState() { 373 return state; 374 } 375 376 public ProcedureEvent<?> getReportEvent() { 377 return reportEvent; 378 } 379 380 public boolean isInState(final ServerState... expected) { 381 boolean expectedState = false; 382 if (expected != null) { 383 for (int i = 0; i < expected.length; ++i) { 384 expectedState |= (state == expected[i]); 385 } 386 } 387 return expectedState; 388 } 389 390 private void setState(final ServerState state) { 391 this.state = state; 392 } 393 394 public Set<RegionStateNode> getRegions() { 395 return regions; 396 } 397 398 public int getRegionCount() { 399 return regions.size(); 400 } 401 402 public ArrayList<RegionInfo> getRegionInfoList() { 403 ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size()); 404 for (RegionStateNode region: regions) { 405 hris.add(region.getRegionInfo()); 406 } 407 return hris; 408 } 409 410 public void addRegion(final RegionStateNode regionNode) { 411 this.regions.add(regionNode); 412 } 413 414 public void removeRegion(final RegionStateNode regionNode) { 415 this.regions.remove(regionNode); 416 } 417 418 @Override 419 public int compareTo(final ServerStateNode other) { 420 return getServerName().compareTo(other.getServerName()); 421 } 422 423 @Override 424 public int hashCode() { 425 return getServerName().hashCode(); 426 } 427 428 @Override 429 public boolean equals(final Object other) { 430 if (this == other) return true; 431 if (!(other instanceof ServerStateNode)) return false; 432 return compareTo((ServerStateNode)other) == 0; 433 } 434 435 @Override 436 public String toString() { 437 return String.format("name=%s, state=%s, regionCount=%d", getServerName(), getState(), 438 getRegionCount()); 439 } 440 } 441 442 public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR = 443 new RegionStateStampComparator(); 444 445 // TODO: Replace the ConcurrentSkipListMaps 446 /** 447 * RegionName -- i.e. RegionInfo.getRegionName() -- as bytes to {@link RegionStateNode} 448 */ 449 private final ConcurrentSkipListMap<byte[], RegionStateNode> regionsMap = 450 new ConcurrentSkipListMap<byte[], RegionStateNode>(Bytes.BYTES_COMPARATOR); 451 452 private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionInTransition = 453 new ConcurrentSkipListMap<RegionInfo, RegionStateNode>(RegionInfo.COMPARATOR); 454 455 /** 456 * Regions marked as offline on a read of hbase:meta. Unused or at least, once 457 * offlined, regions have no means of coming on line again. TODO. 458 */ 459 private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionOffline = 460 new ConcurrentSkipListMap<RegionInfo, RegionStateNode>(); 461 462 private final ConcurrentSkipListMap<byte[], RegionFailedOpen> regionFailedOpen = 463 new ConcurrentSkipListMap<byte[], RegionFailedOpen>(Bytes.BYTES_COMPARATOR); 464 465 private final ConcurrentHashMap<ServerName, ServerStateNode> serverMap = 466 new ConcurrentHashMap<ServerName, ServerStateNode>(); 467 468 public RegionStates() { } 469 470 public void clear() { 471 regionsMap.clear(); 472 regionInTransition.clear(); 473 regionOffline.clear(); 474 serverMap.clear(); 475 } 476 477 @VisibleForTesting 478 public boolean isRegionInRegionStates(final RegionInfo hri) { 479 return (regionsMap.containsKey(hri.getRegionName()) || regionInTransition.containsKey(hri) 480 || regionOffline.containsKey(hri)); 481 } 482 483 // ========================================================================== 484 // RegionStateNode helpers 485 // ========================================================================== 486 protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) { 487 RegionStateNode newNode = new RegionStateNode(regionInfo); 488 RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode); 489 return oldNode != null ? oldNode : newNode; 490 } 491 492 protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) { 493 RegionStateNode node = regionsMap.get(regionInfo.getRegionName()); 494 return node != null ? node : createRegionStateNode(regionInfo); 495 } 496 497 RegionStateNode getRegionStateNodeFromName(final byte[] regionName) { 498 return regionsMap.get(regionName); 499 } 500 501 public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) { 502 return getRegionStateNodeFromName(regionInfo.getRegionName()); 503 } 504 505 public void deleteRegion(final RegionInfo regionInfo) { 506 regionsMap.remove(regionInfo.getRegionName()); 507 // See HBASE-20860 508 // After master restarts, merged regions' RIT state may not be cleaned, 509 // making sure they are cleaned here 510 if (regionInTransition.containsKey(regionInfo)) { 511 regionInTransition.remove(regionInfo); 512 } 513 // Remove from the offline regions map too if there. 514 if (this.regionOffline.containsKey(regionInfo)) { 515 if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " + regionInfo); 516 this.regionOffline.remove(regionInfo); 517 } 518 } 519 520 public void deleteRegions(final List<RegionInfo> regionInfos) { 521 regionInfos.forEach(this::deleteRegion); 522 } 523 524 ArrayList<RegionStateNode> getTableRegionStateNodes(final TableName tableName) { 525 final ArrayList<RegionStateNode> regions = new ArrayList<RegionStateNode>(); 526 for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) { 527 if (!node.getTable().equals(tableName)) break; 528 regions.add(node); 529 } 530 return regions; 531 } 532 533 ArrayList<RegionState> getTableRegionStates(final TableName tableName) { 534 final ArrayList<RegionState> regions = new ArrayList<RegionState>(); 535 for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) { 536 if (!node.getTable().equals(tableName)) break; 537 regions.add(node.toRegionState()); 538 } 539 return regions; 540 } 541 542 ArrayList<RegionInfo> getTableRegionsInfo(final TableName tableName) { 543 final ArrayList<RegionInfo> regions = new ArrayList<RegionInfo>(); 544 for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) { 545 if (!node.getTable().equals(tableName)) break; 546 regions.add(node.getRegionInfo()); 547 } 548 return regions; 549 } 550 551 Collection<RegionStateNode> getRegionStateNodes() { 552 return regionsMap.values(); 553 } 554 555 public ArrayList<RegionState> getRegionStates() { 556 final ArrayList<RegionState> regions = new ArrayList<RegionState>(regionsMap.size()); 557 for (RegionStateNode node: regionsMap.values()) { 558 regions.add(node.toRegionState()); 559 } 560 return regions; 561 } 562 563 // ========================================================================== 564 // RegionState helpers 565 // ========================================================================== 566 public RegionState getRegionState(final RegionInfo regionInfo) { 567 RegionStateNode regionStateNode = getRegionStateNode(regionInfo); 568 return regionStateNode == null ? null : regionStateNode.toRegionState(); 569 } 570 571 public RegionState getRegionState(final String encodedRegionName) { 572 // TODO: Need a map <encodedName, ...> but it is just dispatch merge... 573 for (RegionStateNode node: regionsMap.values()) { 574 if (node.getRegionInfo().getEncodedName().equals(encodedRegionName)) { 575 return node.toRegionState(); 576 } 577 } 578 return null; 579 } 580 581 // ============================================================================================ 582 // TODO: helpers 583 // ============================================================================================ 584 public boolean hasTableRegionStates(final TableName tableName) { 585 // TODO 586 return !getTableRegionStates(tableName).isEmpty(); 587 } 588 589 /** 590 * @return Return online regions of table; does not include OFFLINE or SPLITTING regions. 591 */ 592 public List<RegionInfo> getRegionsOfTable(final TableName table) { 593 return getRegionsOfTable(table, false); 594 } 595 596 private HRegionLocation createRegionForReopen(RegionStateNode node) { 597 synchronized (node) { 598 if (!include(node, false)) { 599 return null; 600 } 601 if (node.isInState(State.OPEN)) { 602 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), 603 node.getOpenSeqNum()); 604 } else if (node.isInState(State.OPENING)) { 605 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), -1); 606 } else { 607 return null; 608 } 609 } 610 } 611 612 /** 613 * Get the regions to be reopened when modifying a table. 614 * <p/> 615 * Notice that the {@code openSeqNum} in the returned HRegionLocation is also used to indicate the 616 * state of this region, positive means the region is in {@link State#OPEN}, -1 means 617 * {@link State#OPENING}. And for regions in other states we do not need reopen them. 618 */ 619 public List<HRegionLocation> getRegionsOfTableForReopen(TableName tableName) { 620 return getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen) 621 .filter(r -> r != null).collect(Collectors.toList()); 622 } 623 624 /** 625 * Check whether the region has been reopened. The meaning of the {@link HRegionLocation} is the 626 * same with {@link #getRegionsOfTableForReopen(TableName)}. 627 * <p/> 628 * For a region which is in {@link State#OPEN} before, if the region state is changed or the open 629 * seq num is changed, we can confirm that it has been reopened. 630 * <p/> 631 * For a region which is in {@link State#OPENING} before, usually it will be in {@link State#OPEN} 632 * now and we will schedule a MRP to reopen it. But there are several exceptions: 633 * <ul> 634 * <li>The region is in state other than {@link State#OPEN} or {@link State#OPENING}.</li> 635 * <li>The location of the region has been changed</li> 636 * </ul> 637 * Of course the region could still be in {@link State#OPENING} state and still on the same 638 * server, then here we will still return a {@link HRegionLocation} for it, just like 639 * {@link #getRegionsOfTableForReopen(TableName)}. 640 * @param oldLoc the previous state/location of this region 641 * @return null if the region has been reopened, otherwise a new {@link HRegionLocation} which 642 * means we still need to reopen the region. 643 * @see #getRegionsOfTableForReopen(TableName) 644 */ 645 public HRegionLocation checkReopened(HRegionLocation oldLoc) { 646 RegionStateNode node = getRegionStateNode(oldLoc.getRegion()); 647 // HBASE-20921 648 // if the oldLoc's state node does not exist, that means the region is 649 // merged or split, no need to check it 650 if (node == null) { 651 return null; 652 } 653 synchronized (node) { 654 if (oldLoc.getSeqNum() >= 0) { 655 // in OPEN state before 656 if (node.isInState(State.OPEN)) { 657 if (node.getOpenSeqNum() > oldLoc.getSeqNum()) { 658 // normal case, the region has been reopened 659 return null; 660 } else { 661 // the open seq num does not change, need to reopen again 662 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), 663 node.getOpenSeqNum()); 664 } 665 } else { 666 // the state has been changed so we can make sure that the region has been reopened(not 667 // finished maybe, but not a problem). 668 return null; 669 } 670 } else { 671 // in OPENING state before 672 if (!node.isInState(State.OPEN, State.OPENING)) { 673 // not in OPEN or OPENING state, then we can make sure that the region has been 674 // reopened(not finished maybe, but not a problem) 675 return null; 676 } else { 677 if (!node.getRegionLocation().equals(oldLoc.getServerName())) { 678 // the region has been moved, so we can make sure that the region has been reopened. 679 return null; 680 } 681 // normal case, we are still in OPENING state, or the reopen has been opened and the state 682 // is changed to OPEN. 683 long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() : -1; 684 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum); 685 } 686 } 687 } 688 } 689 690 /** 691 * @return Return online regions of table; does not include OFFLINE or SPLITTING regions. 692 */ 693 public List<RegionInfo> getRegionsOfTable(TableName table, boolean offline) { 694 return getRegionsOfTable(table, state -> include(state, offline)); 695 } 696 697 /** 698 * @return Return the regions of the table; does not include OFFLINE unless you set 699 * <code>offline</code> to true. Does not include regions that are in the 700 * {@link State#SPLIT} state. 701 */ 702 private List<RegionInfo> getRegionsOfTable(TableName table, Predicate<RegionStateNode> filter) { 703 return getTableRegionStateNodes(table).stream().filter(filter).map(n -> n.getRegionInfo()) 704 .collect(Collectors.toList()); 705 } 706 707 /** 708 * Utility. Whether to include region in list of regions. Default is to 709 * weed out split and offline regions. 710 * @return True if we should include the <code>node</code> (do not include 711 * if split or offline unless <code>offline</code> is set to true. 712 */ 713 boolean include(final RegionStateNode node, final boolean offline) { 714 if (LOG.isTraceEnabled()) { 715 LOG.trace("WORKING ON " + node + " " + node.getRegionInfo()); 716 } 717 if (node.isInState(State.SPLIT)) return false; 718 if (node.isInState(State.OFFLINE) && !offline) return false; 719 final RegionInfo hri = node.getRegionInfo(); 720 return (!hri.isOffline() && !hri.isSplit()) || 721 ((hri.isOffline() || hri.isSplit()) && offline); 722 } 723 724 /** 725 * Returns the set of regions hosted by the specified server 726 * @param serverName the server we are interested in 727 * @return set of RegionInfo hosted by the specified server 728 */ 729 public List<RegionInfo> getServerRegionInfoSet(final ServerName serverName) { 730 final ServerStateNode serverInfo = getServerNode(serverName); 731 if (serverInfo == null) return Collections.emptyList(); 732 733 synchronized (serverInfo) { 734 return serverInfo.getRegionInfoList(); 735 } 736 } 737 738 // ============================================================================================ 739 // Split helpers 740 // These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove 741 // the ServerStateNode by calling removeServer. 742 // ============================================================================================ 743 744 private void setServerState(ServerName serverName, ServerState state) { 745 ServerStateNode serverNode = getOrCreateServer(serverName); 746 synchronized (serverNode) { 747 serverNode.setState(state); 748 } 749 } 750 751 /** 752 * Call this when we start meta log splitting a crashed Server. 753 * @see #metaLogSplit(ServerName) 754 */ 755 public void metaLogSplitting(ServerName serverName) { 756 setServerState(serverName, ServerState.SPLITTING_META); 757 } 758 759 /** 760 * Called after we've split the meta logs on a crashed Server. 761 * @see #metaLogSplitting(ServerName) 762 */ 763 public void metaLogSplit(ServerName serverName) { 764 setServerState(serverName, ServerState.SPLITTING_META_DONE); 765 } 766 767 /** 768 * Call this when we start log splitting for a crashed Server. 769 * @see #logSplit(ServerName) 770 */ 771 public void logSplitting(final ServerName serverName) { 772 setServerState(serverName, ServerState.SPLITTING); 773 } 774 775 /** 776 * Called after we've split all logs on a crashed Server. 777 * @see #logSplitting(ServerName) 778 */ 779 public void logSplit(final ServerName serverName) { 780 setServerState(serverName, ServerState.OFFLINE); 781 } 782 783 public void updateRegionState(final RegionInfo regionInfo, final State state) { 784 final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo); 785 synchronized (regionNode) { 786 regionNode.setState(state); 787 } 788 } 789 790 // ============================================================================================ 791 // TODO: 792 // ============================================================================================ 793 public List<RegionInfo> getAssignedRegions() { 794 final List<RegionInfo> result = new ArrayList<RegionInfo>(); 795 for (RegionStateNode node: regionsMap.values()) { 796 if (!node.isInTransition()) { 797 result.add(node.getRegionInfo()); 798 } 799 } 800 return result; 801 } 802 803 public boolean isRegionInState(final RegionInfo regionInfo, final State... state) { 804 final RegionStateNode region = getRegionStateNode(regionInfo); 805 if (region != null) { 806 synchronized (region) { 807 return region.isInState(state); 808 } 809 } 810 return false; 811 } 812 813 public boolean isRegionOnline(final RegionInfo regionInfo) { 814 return isRegionInState(regionInfo, State.OPEN); 815 } 816 817 /** 818 * @return True if region is offline (In OFFLINE or CLOSED state). 819 */ 820 public boolean isRegionOffline(final RegionInfo regionInfo) { 821 return isRegionInState(regionInfo, State.OFFLINE, State.CLOSED); 822 } 823 824 public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment( 825 final Collection<RegionInfo> regions) { 826 final Map<ServerName, List<RegionInfo>> result = new HashMap<ServerName, List<RegionInfo>>(); 827 for (RegionInfo hri: regions) { 828 final RegionStateNode node = getRegionStateNode(hri); 829 if (node == null) continue; 830 831 // TODO: State.OPEN 832 final ServerName serverName = node.getRegionLocation(); 833 if (serverName == null) continue; 834 835 List<RegionInfo> serverRegions = result.get(serverName); 836 if (serverRegions == null) { 837 serverRegions = new ArrayList<RegionInfo>(); 838 result.put(serverName, serverRegions); 839 } 840 841 serverRegions.add(node.getRegionInfo()); 842 } 843 return result; 844 } 845 846 public Map<RegionInfo, ServerName> getRegionAssignments() { 847 final HashMap<RegionInfo, ServerName> assignments = new HashMap<RegionInfo, ServerName>(); 848 for (RegionStateNode node: regionsMap.values()) { 849 assignments.put(node.getRegionInfo(), node.getRegionLocation()); 850 } 851 return assignments; 852 } 853 854 public Map<RegionState.State, List<RegionInfo>> getRegionByStateOfTable(TableName tableName) { 855 final State[] states = State.values(); 856 final Map<RegionState.State, List<RegionInfo>> tableRegions = 857 new HashMap<State, List<RegionInfo>>(states.length); 858 for (int i = 0; i < states.length; ++i) { 859 tableRegions.put(states[i], new ArrayList<RegionInfo>()); 860 } 861 862 for (RegionStateNode node: regionsMap.values()) { 863 if (node.getTable().equals(tableName)) { 864 tableRegions.get(node.getState()).add(node.getRegionInfo()); 865 } 866 } 867 return tableRegions; 868 } 869 870 public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) { 871 final RegionStateNode region = getRegionStateNode(regionInfo); 872 if (region != null) { 873 synchronized (region) { 874 ServerName server = region.getRegionLocation(); 875 return server != null ? server : region.getLastHost(); 876 } 877 } 878 return null; 879 } 880 881 /** 882 * This is an EXPENSIVE clone. Cloning though is the safest thing to do. 883 * Can't let out original since it can change and at least the load balancer 884 * wants to iterate this exported list. We need to synchronize on regions 885 * since all access to this.servers is under a lock on this.regions. 886 * @param forceByCluster a flag to force to aggregate the server-load to the cluster level 887 * @return A clone of current assignments by table. 888 */ 889 public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsByTable( 890 final boolean forceByCluster) { 891 if (!forceByCluster) return getAssignmentsByTable(); 892 893 final HashMap<ServerName, List<RegionInfo>> ensemble = 894 new HashMap<ServerName, List<RegionInfo>>(serverMap.size()); 895 for (ServerStateNode serverNode: serverMap.values()) { 896 ensemble.put(serverNode.getServerName(), serverNode.getRegionInfoList()); 897 } 898 899 // TODO: can we use Collections.singletonMap(HConstants.ENSEMBLE_TABLE_NAME, ensemble)? 900 final Map<TableName, Map<ServerName, List<RegionInfo>>> result = 901 new HashMap<TableName, Map<ServerName, List<RegionInfo>>>(1); 902 result.put(HConstants.ENSEMBLE_TABLE_NAME, ensemble); 903 return result; 904 } 905 906 public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsByTable() { 907 final Map<TableName, Map<ServerName, List<RegionInfo>>> result = new HashMap<>(); 908 for (RegionStateNode node: regionsMap.values()) { 909 Map<ServerName, List<RegionInfo>> tableResult = result.get(node.getTable()); 910 if (tableResult == null) { 911 tableResult = new HashMap<ServerName, List<RegionInfo>>(); 912 result.put(node.getTable(), tableResult); 913 } 914 915 final ServerName serverName = node.getRegionLocation(); 916 if (serverName == null) { 917 LOG.info("Skipping, no server for " + node); 918 continue; 919 } 920 List<RegionInfo> serverResult = tableResult.get(serverName); 921 if (serverResult == null) { 922 serverResult = new ArrayList<RegionInfo>(); 923 tableResult.put(serverName, serverResult); 924 } 925 926 serverResult.add(node.getRegionInfo()); 927 } 928 // Add online servers with no assignment for the table. 929 for (Map<ServerName, List<RegionInfo>> table: result.values()) { 930 for (ServerName svr : serverMap.keySet()) { 931 if (!table.containsKey(svr)) { 932 table.put(svr, new ArrayList<RegionInfo>()); 933 } 934 } 935 } 936 return result; 937 } 938 939 // ========================================================================== 940 // Region in transition helpers 941 // ========================================================================== 942 protected boolean addRegionInTransition(final RegionStateNode regionNode, 943 final RegionTransitionProcedure procedure) { 944 if (procedure != null && !regionNode.setProcedure(procedure)) return false; 945 946 regionInTransition.put(regionNode.getRegionInfo(), regionNode); 947 return true; 948 } 949 950 protected void removeRegionInTransition(final RegionStateNode regionNode, 951 final RegionTransitionProcedure procedure) { 952 regionInTransition.remove(regionNode.getRegionInfo()); 953 regionNode.unsetProcedure(procedure); 954 } 955 956 public boolean hasRegionsInTransition() { 957 return !regionInTransition.isEmpty(); 958 } 959 960 public boolean isRegionInTransition(final RegionInfo regionInfo) { 961 final RegionStateNode node = regionInTransition.get(regionInfo); 962 return node != null ? node.isInTransition() : false; 963 } 964 965 /** 966 * @return If a procedure-in-transition for <code>hri</code>, return it else null. 967 */ 968 public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) { 969 RegionStateNode node = regionInTransition.get(hri); 970 if (node == null) return null; 971 return node.getProcedure(); 972 } 973 974 public RegionState getRegionTransitionState(final RegionInfo hri) { 975 RegionStateNode node = regionInTransition.get(hri); 976 if (node == null) return null; 977 978 synchronized (node) { 979 return node.isInTransition() ? node.toRegionState() : null; 980 } 981 } 982 983 public List<RegionStateNode> getRegionsInTransition() { 984 return new ArrayList<RegionStateNode>(regionInTransition.values()); 985 } 986 987 /** 988 * Get the number of regions in transition. 989 */ 990 public int getRegionsInTransitionCount() { 991 return regionInTransition.size(); 992 } 993 994 public List<RegionState> getRegionsStateInTransition() { 995 final List<RegionState> rit = new ArrayList<RegionState>(regionInTransition.size()); 996 for (RegionStateNode node: regionInTransition.values()) { 997 rit.add(node.toRegionState()); 998 } 999 return rit; 1000 } 1001 1002 public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() { 1003 final SortedSet<RegionState> rit = new TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR); 1004 for (RegionStateNode node: regionInTransition.values()) { 1005 rit.add(node.toRegionState()); 1006 } 1007 return rit; 1008 } 1009 1010 // ========================================================================== 1011 // Region offline helpers 1012 // ========================================================================== 1013 // TODO: Populated when we read meta but regions never make it out of here. 1014 public void addToOfflineRegions(final RegionStateNode regionNode) { 1015 LOG.info("Added to offline, CURRENTLY NEVER CLEARED!!! " + regionNode); 1016 regionOffline.put(regionNode.getRegionInfo(), regionNode); 1017 } 1018 1019 // TODO: Unused. 1020 public void removeFromOfflineRegions(final RegionInfo regionInfo) { 1021 regionOffline.remove(regionInfo); 1022 } 1023 1024 // ========================================================================== 1025 // Region FAIL_OPEN helpers 1026 // ========================================================================== 1027 public static final class RegionFailedOpen { 1028 private final RegionStateNode regionNode; 1029 1030 private volatile Exception exception = null; 1031 private AtomicInteger retries = new AtomicInteger(); 1032 1033 public RegionFailedOpen(final RegionStateNode regionNode) { 1034 this.regionNode = regionNode; 1035 } 1036 1037 public RegionStateNode getRegionStateNode() { 1038 return regionNode; 1039 } 1040 1041 public RegionInfo getRegionInfo() { 1042 return regionNode.getRegionInfo(); 1043 } 1044 1045 public int incrementAndGetRetries() { 1046 return this.retries.incrementAndGet(); 1047 } 1048 1049 public int getRetries() { 1050 return retries.get(); 1051 } 1052 1053 public void setException(final Exception exception) { 1054 this.exception = exception; 1055 } 1056 1057 public Exception getException() { 1058 return this.exception; 1059 } 1060 } 1061 1062 public RegionFailedOpen addToFailedOpen(final RegionStateNode regionNode) { 1063 final byte[] key = regionNode.getRegionInfo().getRegionName(); 1064 RegionFailedOpen node = regionFailedOpen.get(key); 1065 if (node == null) { 1066 RegionFailedOpen newNode = new RegionFailedOpen(regionNode); 1067 RegionFailedOpen oldNode = regionFailedOpen.putIfAbsent(key, newNode); 1068 node = oldNode != null ? oldNode : newNode; 1069 } 1070 return node; 1071 } 1072 1073 public RegionFailedOpen getFailedOpen(final RegionInfo regionInfo) { 1074 return regionFailedOpen.get(regionInfo.getRegionName()); 1075 } 1076 1077 public void removeFromFailedOpen(final RegionInfo regionInfo) { 1078 regionFailedOpen.remove(regionInfo.getRegionName()); 1079 } 1080 1081 public List<RegionState> getRegionFailedOpen() { 1082 if (regionFailedOpen.isEmpty()) return Collections.emptyList(); 1083 1084 ArrayList<RegionState> regions = new ArrayList<RegionState>(regionFailedOpen.size()); 1085 for (RegionFailedOpen r: regionFailedOpen.values()) { 1086 regions.add(r.getRegionStateNode().toRegionState()); 1087 } 1088 return regions; 1089 } 1090 1091 // ========================================================================== 1092 // Servers 1093 // ========================================================================== 1094 1095 /** 1096 * Be judicious calling this method. Do it on server register ONLY otherwise 1097 * you could mess up online server accounting. TOOD: Review usage and convert 1098 * to {@link #getServerNode(ServerName)} where we can. 1099 */ 1100 ServerStateNode getOrCreateServer(final ServerName serverName) { 1101 ServerStateNode node = serverMap.get(serverName); 1102 if (node == null) { 1103 LOG.trace("CREATING! {}", serverName, new RuntimeException("WHERE AM I?")); 1104 node = new ServerStateNode(serverName); 1105 ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node); 1106 node = oldNode != null ? oldNode : node; 1107 } 1108 return node; 1109 } 1110 1111 public void removeServer(final ServerName serverName) { 1112 serverMap.remove(serverName); 1113 } 1114 1115 public ServerStateNode getServerNode(final ServerName serverName) { 1116 return serverMap.get(serverName); 1117 } 1118 1119 public double getAverageLoad() { 1120 int numServers = 0; 1121 int totalLoad = 0; 1122 for (ServerStateNode node: serverMap.values()) { 1123 totalLoad += node.getRegionCount(); 1124 numServers++; 1125 } 1126 return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers; 1127 } 1128 1129 /** 1130 * Add reference to region to serverstatenode. 1131 * DOES NOT AUTO-CREATE ServerStateNode instance. 1132 * @return Return serverstatenode or null if none. 1133 */ 1134 ServerStateNode addRegionToServer(final RegionStateNode regionNode) { 1135 ServerStateNode ssn = getServerNode(regionNode.getRegionLocation()); 1136 if (ssn == null) { 1137 return ssn; 1138 } 1139 ssn.addRegion(regionNode); 1140 return ssn; 1141 } 1142 1143 public ServerStateNode removeRegionFromServer(final ServerName serverName, 1144 final RegionStateNode regionNode) { 1145 ServerStateNode serverNode = getServerNode(serverName); 1146 if (serverNode != null) { 1147 serverNode.removeRegion(regionNode); 1148 } 1149 return serverNode; 1150 } 1151 1152 // ========================================================================== 1153 // ToString helpers 1154 // ========================================================================== 1155 public static String regionNamesToString(final Collection<byte[]> regions) { 1156 final StringBuilder sb = new StringBuilder(); 1157 final Iterator<byte[]> it = regions.iterator(); 1158 sb.append("["); 1159 if (it.hasNext()) { 1160 sb.append(Bytes.toStringBinary(it.next())); 1161 while (it.hasNext()) { 1162 sb.append(", "); 1163 sb.append(Bytes.toStringBinary(it.next())); 1164 } 1165 } 1166 sb.append("]"); 1167 return sb.toString(); 1168 } 1169}