001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.master.assignment; 021 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.Comparator; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031import java.util.Set; 032import java.util.SortedSet; 033import java.util.TreeSet; 034import java.util.concurrent.ConcurrentHashMap; 035import java.util.concurrent.ConcurrentSkipListMap; 036import java.util.concurrent.atomic.AtomicInteger; 037import java.util.function.Predicate; 038import java.util.stream.Collectors; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.HRegionLocation; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.client.RegionInfo; 044import org.apache.hadoop.hbase.client.RegionReplicaUtil; 045import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; 046import org.apache.hadoop.hbase.master.RegionState; 047import org.apache.hadoop.hbase.master.RegionState.State; 048import org.apache.hadoop.hbase.procedure2.ProcedureEvent; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 051import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 052import org.apache.yetus.audience.InterfaceAudience; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055 056import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 057 058/** 059 * RegionStates contains a set of Maps that describes the in-memory state of the AM, with 060 * the regions available in the system, the region in transition, the offline regions and 061 * the servers holding regions. 062 */ 063@InterfaceAudience.Private 064public class RegionStates { 065 private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class); 066 067 protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] { 068 State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver 069 // but before complete finish of AssignProcedure. HBASE-20100. 070 State.OFFLINE, State.CLOSED, // disable/offline 071 State.SPLITTING, State.SPLIT, // ServerCrashProcedure 072 State.OPENING, State.FAILED_OPEN, // already in-progress (retrying) 073 }; 074 075 protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] { 076 State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure 077 State.OPEN, // enabled/open 078 State.CLOSING // already in-progress (retrying) 079 }; 080 081 private static class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> { 082 public AssignmentProcedureEvent(final RegionInfo regionInfo) { 083 super(regionInfo); 084 } 085 } 086 087 private static class ServerReportEvent extends ProcedureEvent<ServerName> { 088 public ServerReportEvent(final ServerName serverName) { 089 super(serverName); 090 } 091 } 092 093 /** 094 * Current Region State. 095 * In-memory only. Not persisted. 096 */ 097 // Mutable/Immutable? Changes have to be synchronized or not? 098 // Data members are volatile which seems to say multi-threaded access is fine. 099 // In the below we do check and set but the check state could change before 100 // we do the set because no synchronization....which seems dodgy. Clear up 101 // understanding here... how many threads accessing? Do locks make it so one 102 // thread at a time working on a single Region's RegionStateNode? Lets presume 103 // so for now. Odd is that elsewhere in this RegionStates, we synchronize on 104 // the RegionStateNode instance. TODO. 105 public static class RegionStateNode implements Comparable<RegionStateNode> { 106 private final RegionInfo regionInfo; 107 private final ProcedureEvent<?> event; 108 109 private volatile RegionTransitionProcedure procedure = null; 110 private volatile ServerName regionLocation = null; 111 // notice that, the lastHost will only be updated when a region is successfully CLOSED through 112 // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync 113 // with the data in meta. 114 private volatile ServerName lastHost = null; 115 /** 116 * A Region-in-Transition (RIT) moves through states. 117 * See {@link State} for complete list. A Region that 118 * is opened moves from OFFLINE => OPENING => OPENED. 119 */ 120 private volatile State state = State.OFFLINE; 121 122 /** 123 * Updated whenever a call to {@link #setRegionLocation(ServerName)} 124 * or {@link #setState(State, State...)}. 125 */ 126 private volatile long lastUpdate = 0; 127 128 private volatile long openSeqNum = HConstants.NO_SEQNUM; 129 130 public RegionStateNode(final RegionInfo regionInfo) { 131 this.regionInfo = regionInfo; 132 this.event = new AssignmentProcedureEvent(regionInfo); 133 } 134 135 /** 136 * @param update new region state this node should be assigned. 137 * @param expected current state should be in this given list of expected states 138 * @return true, if current state is in expected list; otherwise false. 139 */ 140 public boolean setState(final State update, final State... expected) { 141 if (!isInState(expected)) { 142 return false; 143 } 144 this.state = update; 145 this.lastUpdate = EnvironmentEdgeManager.currentTime(); 146 return true; 147 } 148 149 /** 150 * Put region into OFFLINE mode (set state and clear location). 151 * @return Last recorded server deploy 152 */ 153 public ServerName offline() { 154 setState(State.OFFLINE); 155 return setRegionLocation(null); 156 } 157 158 /** 159 * Set new {@link State} but only if currently in <code>expected</code> State 160 * (if not, throw {@link UnexpectedStateException}. 161 */ 162 public void transitionState(final State update, final State... expected) 163 throws UnexpectedStateException { 164 if (!setState(update, expected)) { 165 throw new UnexpectedStateException("Expected " + Arrays.toString(expected) + 166 " so could move to " + update + " but current state=" + getState()); 167 } 168 } 169 170 public boolean isInState(final State... expected) { 171 if (expected != null && expected.length > 0) { 172 boolean expectedState = false; 173 for (int i = 0; i < expected.length; ++i) { 174 expectedState |= (getState() == expected[i]); 175 } 176 return expectedState; 177 } 178 return true; 179 } 180 181 public boolean isStuck() { 182 return isInState(State.FAILED_OPEN) && getProcedure() != null; 183 } 184 185 public boolean isInTransition() { 186 return getProcedure() != null; 187 } 188 189 public long getLastUpdate() { 190 return procedure != null ? procedure.getLastUpdate() : lastUpdate; 191 } 192 193 public void setLastHost(final ServerName serverName) { 194 this.lastHost = serverName; 195 } 196 197 public void setOpenSeqNum(final long seqId) { 198 this.openSeqNum = seqId; 199 } 200 201 public ServerName setRegionLocation(final ServerName serverName) { 202 ServerName lastRegionLocation = this.regionLocation; 203 if (LOG.isTraceEnabled() && serverName == null) { 204 LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE")); 205 } 206 this.regionLocation = serverName; 207 this.lastUpdate = EnvironmentEdgeManager.currentTime(); 208 return lastRegionLocation; 209 } 210 211 public boolean setProcedure(final RegionTransitionProcedure proc) { 212 if (this.procedure != null && this.procedure != proc) { 213 return false; 214 } 215 this.procedure = proc; 216 return true; 217 } 218 219 public boolean unsetProcedure(final RegionTransitionProcedure proc) { 220 if (this.procedure != null && this.procedure != proc) { 221 return false; 222 } 223 this.procedure = null; 224 return true; 225 } 226 227 public RegionTransitionProcedure getProcedure() { 228 return procedure; 229 } 230 231 public ProcedureEvent<?> getProcedureEvent() { 232 return event; 233 } 234 235 public RegionInfo getRegionInfo() { 236 return regionInfo; 237 } 238 239 public TableName getTable() { 240 return getRegionInfo().getTable(); 241 } 242 243 public boolean isSystemTable() { 244 return getTable().isSystemTable(); 245 } 246 247 public ServerName getLastHost() { 248 return lastHost; 249 } 250 251 public ServerName getRegionLocation() { 252 return regionLocation; 253 } 254 255 public State getState() { 256 return state; 257 } 258 259 public long getOpenSeqNum() { 260 return openSeqNum; 261 } 262 263 public int getFormatVersion() { 264 // we don't have any format for now 265 // it should probably be in regionInfo.getFormatVersion() 266 return 0; 267 } 268 269 public RegionState toRegionState() { 270 return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation()); 271 } 272 273 @Override 274 public int compareTo(final RegionStateNode other) { 275 // NOTE: RegionInfo sort by table first, so we are relying on that. 276 // we have a TestRegionState#testOrderedByTable() that check for that. 277 return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo()); 278 } 279 280 @Override 281 public int hashCode() { 282 return getRegionInfo().hashCode(); 283 } 284 285 @Override 286 public boolean equals(final Object other) { 287 if (this == other) return true; 288 if (!(other instanceof RegionStateNode)) return false; 289 return compareTo((RegionStateNode)other) == 0; 290 } 291 292 @Override 293 public String toString() { 294 return toDescriptiveString(); 295 } 296 297 public String toShortString() { 298 // rit= is the current Region-In-Transition State -- see State enum. 299 return String.format("rit=%s, location=%s", getState(), getRegionLocation()); 300 } 301 302 public String toDescriptiveString() { 303 return String.format("%s, table=%s, region=%s", 304 toShortString(), getTable(), getRegionInfo().getEncodedName()); 305 } 306 } 307 308 // This comparator sorts the RegionStates by time stamp then Region name. 309 // Comparing by timestamp alone can lead us to discard different RegionStates that happen 310 // to share a timestamp. 311 private static class RegionStateStampComparator implements Comparator<RegionState> { 312 @Override 313 public int compare(final RegionState l, final RegionState r) { 314 int stampCmp = Long.compare(l.getStamp(), r.getStamp()); 315 return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion()); 316 } 317 } 318 319 /** 320 * Server State. 321 */ 322 public enum ServerState { 323 /** 324 * Initial state. Available. 325 */ 326 ONLINE, 327 328 /** 329 * Only server which carries meta can have this state. We will split wal for meta and then 330 * assign meta first before splitting other wals. 331 */ 332 SPLITTING_META, 333 334 /** 335 * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure 336 * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more 337 * details. 338 */ 339 SPLITTING_META_DONE, 340 341 /** 342 * Server expired/crashed. Currently undergoing WAL splitting. 343 */ 344 SPLITTING, 345 346 /** 347 * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely 348 * quit. See the comments in UnassignProcedure.remoteCallFailed for more details. 349 */ 350 OFFLINE 351 } 352 353 /** 354 * State of Server; list of hosted regions, etc. 355 */ 356 public static class ServerStateNode implements Comparable<ServerStateNode> { 357 private final ServerReportEvent reportEvent; 358 359 private final Set<RegionStateNode> regions; 360 private final ServerName serverName; 361 362 private volatile ServerState state = ServerState.ONLINE; 363 364 public ServerStateNode(final ServerName serverName) { 365 this.serverName = serverName; 366 this.regions = ConcurrentHashMap.newKeySet(); 367 this.reportEvent = new ServerReportEvent(serverName); 368 } 369 370 public ServerName getServerName() { 371 return serverName; 372 } 373 374 public ServerState getState() { 375 return state; 376 } 377 378 public ProcedureEvent<?> getReportEvent() { 379 return reportEvent; 380 } 381 382 public boolean isInState(final ServerState... expected) { 383 boolean expectedState = false; 384 if (expected != null) { 385 for (int i = 0; i < expected.length; ++i) { 386 expectedState |= (state == expected[i]); 387 } 388 } 389 return expectedState; 390 } 391 392 private void setState(final ServerState state) { 393 this.state = state; 394 } 395 396 public Set<RegionStateNode> getRegions() { 397 return regions; 398 } 399 400 public int getRegionCount() { 401 return regions.size(); 402 } 403 404 public ArrayList<RegionInfo> getRegionInfoList() { 405 ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size()); 406 for (RegionStateNode region: regions) { 407 hris.add(region.getRegionInfo()); 408 } 409 return hris; 410 } 411 412 public void addRegion(final RegionStateNode regionNode) { 413 this.regions.add(regionNode); 414 } 415 416 public void removeRegion(final RegionStateNode regionNode) { 417 this.regions.remove(regionNode); 418 } 419 420 @Override 421 public int compareTo(final ServerStateNode other) { 422 return getServerName().compareTo(other.getServerName()); 423 } 424 425 @Override 426 public int hashCode() { 427 return getServerName().hashCode(); 428 } 429 430 @Override 431 public boolean equals(final Object other) { 432 if (this == other) return true; 433 if (!(other instanceof ServerStateNode)) return false; 434 return compareTo((ServerStateNode)other) == 0; 435 } 436 437 @Override 438 public String toString() { 439 return String.format("name=%s, state=%s, regionCount=%d", getServerName(), getState(), 440 getRegionCount()); 441 } 442 } 443 444 public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR = 445 new RegionStateStampComparator(); 446 447 // TODO: Replace the ConcurrentSkipListMaps 448 /** 449 * RegionName -- i.e. RegionInfo.getRegionName() -- as bytes to {@link RegionStateNode} 450 */ 451 private final ConcurrentSkipListMap<byte[], RegionStateNode> regionsMap = 452 new ConcurrentSkipListMap<byte[], RegionStateNode>(Bytes.BYTES_COMPARATOR); 453 454 private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionInTransition = 455 new ConcurrentSkipListMap<RegionInfo, RegionStateNode>(RegionInfo.COMPARATOR); 456 457 /** 458 * Regions marked as offline on a read of hbase:meta. Unused or at least, once 459 * offlined, regions have no means of coming on line again. TODO. 460 */ 461 private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> regionOffline = 462 new ConcurrentSkipListMap<RegionInfo, RegionStateNode>(); 463 464 private final ConcurrentSkipListMap<byte[], RegionFailedOpen> regionFailedOpen = 465 new ConcurrentSkipListMap<byte[], RegionFailedOpen>(Bytes.BYTES_COMPARATOR); 466 467 private final ConcurrentHashMap<ServerName, ServerStateNode> serverMap = 468 new ConcurrentHashMap<ServerName, ServerStateNode>(); 469 470 public RegionStates() { } 471 472 public void clear() { 473 regionsMap.clear(); 474 regionInTransition.clear(); 475 regionOffline.clear(); 476 serverMap.clear(); 477 } 478 479 @VisibleForTesting 480 public boolean isRegionInRegionStates(final RegionInfo hri) { 481 return (regionsMap.containsKey(hri.getRegionName()) || regionInTransition.containsKey(hri) 482 || regionOffline.containsKey(hri)); 483 } 484 485 // ========================================================================== 486 // RegionStateNode helpers 487 // ========================================================================== 488 protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) { 489 RegionStateNode newNode = new RegionStateNode(regionInfo); 490 RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode); 491 return oldNode != null ? oldNode : newNode; 492 } 493 494 protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) { 495 RegionStateNode node = regionsMap.get(regionInfo.getRegionName()); 496 return node != null ? node : createRegionStateNode(regionInfo); 497 } 498 499 RegionStateNode getRegionStateNodeFromName(final byte[] regionName) { 500 return regionsMap.get(regionName); 501 } 502 503 public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) { 504 return getRegionStateNodeFromName(regionInfo.getRegionName()); 505 } 506 507 public void deleteRegion(final RegionInfo regionInfo) { 508 regionsMap.remove(regionInfo.getRegionName()); 509 // See HBASE-20860 510 // After master restarts, merged regions' RIT state may not be cleaned, 511 // making sure they are cleaned here 512 if (regionInTransition.containsKey(regionInfo)) { 513 regionInTransition.remove(regionInfo); 514 } 515 // Remove from the offline regions map too if there. 516 if (this.regionOffline.containsKey(regionInfo)) { 517 if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " + regionInfo); 518 this.regionOffline.remove(regionInfo); 519 } 520 } 521 522 public void deleteRegions(final List<RegionInfo> regionInfos) { 523 regionInfos.forEach(this::deleteRegion); 524 } 525 526 ArrayList<RegionStateNode> getTableRegionStateNodes(final TableName tableName) { 527 final ArrayList<RegionStateNode> regions = new ArrayList<RegionStateNode>(); 528 for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) { 529 if (!node.getTable().equals(tableName)) break; 530 regions.add(node); 531 } 532 return regions; 533 } 534 535 ArrayList<RegionState> getTableRegionStates(final TableName tableName) { 536 final ArrayList<RegionState> regions = new ArrayList<RegionState>(); 537 for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) { 538 if (!node.getTable().equals(tableName)) break; 539 regions.add(node.toRegionState()); 540 } 541 return regions; 542 } 543 544 ArrayList<RegionInfo> getTableRegionsInfo(final TableName tableName) { 545 final ArrayList<RegionInfo> regions = new ArrayList<RegionInfo>(); 546 for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) { 547 if (!node.getTable().equals(tableName)) break; 548 regions.add(node.getRegionInfo()); 549 } 550 return regions; 551 } 552 553 Collection<RegionStateNode> getRegionStateNodes() { 554 return regionsMap.values(); 555 } 556 557 public ArrayList<RegionState> getRegionStates() { 558 final ArrayList<RegionState> regions = new ArrayList<RegionState>(regionsMap.size()); 559 for (RegionStateNode node: regionsMap.values()) { 560 regions.add(node.toRegionState()); 561 } 562 return regions; 563 } 564 565 // ========================================================================== 566 // RegionState helpers 567 // ========================================================================== 568 public RegionState getRegionState(final RegionInfo regionInfo) { 569 RegionStateNode regionStateNode = getRegionStateNode(regionInfo); 570 return regionStateNode == null ? null : regionStateNode.toRegionState(); 571 } 572 573 public RegionState getRegionState(final String encodedRegionName) { 574 // TODO: Need a map <encodedName, ...> but it is just dispatch merge... 575 for (RegionStateNode node: regionsMap.values()) { 576 if (node.getRegionInfo().getEncodedName().equals(encodedRegionName)) { 577 return node.toRegionState(); 578 } 579 } 580 return null; 581 } 582 583 // ============================================================================================ 584 // TODO: helpers 585 // ============================================================================================ 586 public boolean hasTableRegionStates(final TableName tableName) { 587 // TODO 588 return !getTableRegionStates(tableName).isEmpty(); 589 } 590 591 /** 592 * @return Return online regions of table; does not include OFFLINE or SPLITTING regions. 593 */ 594 public List<RegionInfo> getRegionsOfTable(final TableName table) { 595 return getRegionsOfTable(table, false); 596 } 597 598 private HRegionLocation createRegionForReopen(RegionStateNode node) { 599 synchronized (node) { 600 if (!include(node, false)) { 601 return null; 602 } 603 if (node.isInState(State.OPEN)) { 604 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), 605 node.getOpenSeqNum()); 606 } else if (node.isInState(State.OPENING)) { 607 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), -1); 608 } else { 609 return null; 610 } 611 } 612 } 613 614 /** 615 * Get the regions to be reopened when modifying a table. 616 * <p/> 617 * Notice that the {@code openSeqNum} in the returned HRegionLocation is also used to indicate the 618 * state of this region, positive means the region is in {@link State#OPEN}, -1 means 619 * {@link State#OPENING}. And for regions in other states we do not need reopen them. 620 */ 621 public List<HRegionLocation> getRegionsOfTableForReopen(TableName tableName) { 622 return getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen) 623 .filter(r -> r != null).collect(Collectors.toList()); 624 } 625 626 /** 627 * Check whether the region has been reopened. The meaning of the {@link HRegionLocation} is the 628 * same with {@link #getRegionsOfTableForReopen(TableName)}. 629 * <p/> 630 * For a region which is in {@link State#OPEN} before, if the region state is changed or the open 631 * seq num is changed, we can confirm that it has been reopened. 632 * <p/> 633 * For a region which is in {@link State#OPENING} before, usually it will be in {@link State#OPEN} 634 * now and we will schedule a MRP to reopen it. But there are several exceptions: 635 * <ul> 636 * <li>The region is in state other than {@link State#OPEN} or {@link State#OPENING}.</li> 637 * <li>The location of the region has been changed</li> 638 * </ul> 639 * Of course the region could still be in {@link State#OPENING} state and still on the same 640 * server, then here we will still return a {@link HRegionLocation} for it, just like 641 * {@link #getRegionsOfTableForReopen(TableName)}. 642 * @param oldLoc the previous state/location of this region 643 * @return null if the region has been reopened, otherwise a new {@link HRegionLocation} which 644 * means we still need to reopen the region. 645 * @see #getRegionsOfTableForReopen(TableName) 646 */ 647 public HRegionLocation checkReopened(HRegionLocation oldLoc) { 648 RegionStateNode node = getRegionStateNode(oldLoc.getRegion()); 649 // HBASE-20921 650 // if the oldLoc's state node does not exist, that means the region is 651 // merged or split, no need to check it 652 if (node == null) { 653 return null; 654 } 655 synchronized (node) { 656 if (oldLoc.getSeqNum() >= 0) { 657 // in OPEN state before 658 if (node.isInState(State.OPEN)) { 659 if (node.getOpenSeqNum() > oldLoc.getSeqNum()) { 660 // normal case, the region has been reopened 661 return null; 662 } else { 663 // the open seq num does not change, need to reopen again 664 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), 665 node.getOpenSeqNum()); 666 } 667 } else { 668 // the state has been changed so we can make sure that the region has been reopened(not 669 // finished maybe, but not a problem). 670 return null; 671 } 672 } else { 673 // in OPENING state before 674 if (!node.isInState(State.OPEN, State.OPENING)) { 675 // not in OPEN or OPENING state, then we can make sure that the region has been 676 // reopened(not finished maybe, but not a problem) 677 return null; 678 } else { 679 if (!node.getRegionLocation().equals(oldLoc.getServerName())) { 680 // the region has been moved, so we can make sure that the region has been reopened. 681 return null; 682 } 683 // normal case, we are still in OPENING state, or the reopen has been opened and the state 684 // is changed to OPEN. 685 long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() : -1; 686 return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum); 687 } 688 } 689 } 690 } 691 692 /** 693 * @return Return online regions of table; does not include OFFLINE or SPLITTING regions. 694 */ 695 public List<RegionInfo> getRegionsOfTable(TableName table, boolean offline) { 696 return getRegionsOfTable(table, state -> include(state, offline)); 697 } 698 699 /** 700 * @return Return the regions of the table; does not include OFFLINE unless you set 701 * <code>offline</code> to true. Does not include regions that are in the 702 * {@link State#SPLIT} state. 703 */ 704 private List<RegionInfo> getRegionsOfTable(TableName table, Predicate<RegionStateNode> filter) { 705 return getTableRegionStateNodes(table).stream().filter(filter).map(n -> n.getRegionInfo()) 706 .collect(Collectors.toList()); 707 } 708 709 /** 710 * Utility. Whether to include region in list of regions. Default is to 711 * weed out split and offline regions. 712 * @return True if we should include the <code>node</code> (do not include 713 * if split or offline unless <code>offline</code> is set to true. 714 */ 715 boolean include(final RegionStateNode node, final boolean offline) { 716 if (LOG.isTraceEnabled()) { 717 LOG.trace("WORKING ON " + node + " " + node.getRegionInfo()); 718 } 719 if (node.isInState(State.SPLIT)) return false; 720 if (node.isInState(State.OFFLINE) && !offline) return false; 721 final RegionInfo hri = node.getRegionInfo(); 722 return (!hri.isOffline() && !hri.isSplit()) || 723 ((hri.isOffline() || hri.isSplit()) && offline); 724 } 725 726 /** 727 * Returns the set of regions hosted by the specified server 728 * @param serverName the server we are interested in 729 * @return set of RegionInfo hosted by the specified server 730 */ 731 public List<RegionInfo> getServerRegionInfoSet(final ServerName serverName) { 732 final ServerStateNode serverInfo = getServerNode(serverName); 733 if (serverInfo == null) return Collections.emptyList(); 734 735 synchronized (serverInfo) { 736 return serverInfo.getRegionInfoList(); 737 } 738 } 739 740 // ============================================================================================ 741 // Split helpers 742 // These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove 743 // the ServerStateNode by calling removeServer. 744 // ============================================================================================ 745 746 private void setServerState(ServerName serverName, ServerState state) { 747 ServerStateNode serverNode = getOrCreateServer(serverName); 748 synchronized (serverNode) { 749 serverNode.setState(state); 750 } 751 } 752 753 /** 754 * Call this when we start meta log splitting a crashed Server. 755 * @see #metaLogSplit(ServerName) 756 */ 757 public void metaLogSplitting(ServerName serverName) { 758 setServerState(serverName, ServerState.SPLITTING_META); 759 } 760 761 /** 762 * Called after we've split the meta logs on a crashed Server. 763 * @see #metaLogSplitting(ServerName) 764 */ 765 public void metaLogSplit(ServerName serverName) { 766 setServerState(serverName, ServerState.SPLITTING_META_DONE); 767 } 768 769 /** 770 * Call this when we start log splitting for a crashed Server. 771 * @see #logSplit(ServerName) 772 */ 773 public void logSplitting(final ServerName serverName) { 774 setServerState(serverName, ServerState.SPLITTING); 775 } 776 777 /** 778 * Called after we've split all logs on a crashed Server. 779 * @see #logSplitting(ServerName) 780 */ 781 public void logSplit(final ServerName serverName) { 782 setServerState(serverName, ServerState.OFFLINE); 783 } 784 785 public void updateRegionState(final RegionInfo regionInfo, final State state) { 786 final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo); 787 synchronized (regionNode) { 788 regionNode.setState(state); 789 } 790 } 791 792 // ============================================================================================ 793 // TODO: 794 // ============================================================================================ 795 public List<RegionInfo> getAssignedRegions() { 796 final List<RegionInfo> result = new ArrayList<RegionInfo>(); 797 for (RegionStateNode node: regionsMap.values()) { 798 if (!node.isInTransition()) { 799 result.add(node.getRegionInfo()); 800 } 801 } 802 return result; 803 } 804 805 public boolean isRegionInState(final RegionInfo regionInfo, final State... state) { 806 final RegionStateNode region = getRegionStateNode(regionInfo); 807 if (region != null) { 808 synchronized (region) { 809 return region.isInState(state); 810 } 811 } 812 return false; 813 } 814 815 public boolean isRegionOnline(final RegionInfo regionInfo) { 816 return isRegionInState(regionInfo, State.OPEN); 817 } 818 819 /** 820 * @return True if region is offline (In OFFLINE or CLOSED state). 821 */ 822 public boolean isRegionOffline(final RegionInfo regionInfo) { 823 return isRegionInState(regionInfo, State.OFFLINE, State.CLOSED); 824 } 825 826 public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment( 827 final Collection<RegionInfo> regions) { 828 final Map<ServerName, List<RegionInfo>> result = new HashMap<ServerName, List<RegionInfo>>(); 829 if (regions != null) { 830 for (RegionInfo hri : regions) { 831 final RegionStateNode node = getRegionStateNode(hri); 832 if (node == null) { 833 continue; 834 } 835 createSnapshot(node, result); 836 } 837 } else { 838 for (RegionStateNode node : regionsMap.values()) { 839 if (node == null) { 840 continue; 841 } 842 createSnapshot(node, result); 843 } 844 } 845 return result; 846 } 847 848 private void createSnapshot(RegionStateNode node, Map<ServerName, List<RegionInfo>> result) { 849 final ServerName serverName = node.getRegionLocation(); 850 if (serverName == null) { 851 return; 852 } 853 854 List<RegionInfo> serverRegions = result.get(serverName); 855 if (serverRegions == null) { 856 serverRegions = new ArrayList<RegionInfo>(); 857 result.put(serverName, serverRegions); 858 } 859 serverRegions.add(node.getRegionInfo()); 860 } 861 862 public Map<RegionInfo, ServerName> getRegionAssignments() { 863 final HashMap<RegionInfo, ServerName> assignments = new HashMap<RegionInfo, ServerName>(); 864 for (RegionStateNode node: regionsMap.values()) { 865 assignments.put(node.getRegionInfo(), node.getRegionLocation()); 866 } 867 return assignments; 868 } 869 870 public Map<RegionState.State, List<RegionInfo>> getRegionByStateOfTable(TableName tableName) { 871 final State[] states = State.values(); 872 final Map<RegionState.State, List<RegionInfo>> tableRegions = 873 new HashMap<State, List<RegionInfo>>(states.length); 874 for (int i = 0; i < states.length; ++i) { 875 tableRegions.put(states[i], new ArrayList<RegionInfo>()); 876 } 877 878 for (RegionStateNode node: regionsMap.values()) { 879 if (node.getTable().equals(tableName)) { 880 tableRegions.get(node.getState()).add(node.getRegionInfo()); 881 } 882 } 883 return tableRegions; 884 } 885 886 public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) { 887 final RegionStateNode region = getRegionStateNode(regionInfo); 888 if (region != null) { 889 synchronized (region) { 890 ServerName server = region.getRegionLocation(); 891 return server != null ? server : region.getLastHost(); 892 } 893 } 894 return null; 895 } 896 897 /** 898 * This is an EXPENSIVE clone. Cloning though is the safest thing to do. 899 * Can't let out original since it can change and at least the load balancer 900 * wants to iterate this exported list. We need to synchronize on regions 901 * since all access to this.servers is under a lock on this.regions. 902 * 903 * @param isByTable If <code>true</code>, return the assignments by table. If <code>false</code>, 904 * return the assignments which aggregate the server-load to the cluster level. 905 * @return A clone of current assignments. 906 */ 907 public Map<TableName, Map<ServerName, List<RegionInfo>>> getAssignmentsForBalancer( 908 boolean isByTable) { 909 final Map<TableName, Map<ServerName, List<RegionInfo>>> result = new HashMap<>(); 910 if (isByTable) { 911 for (RegionStateNode node : regionsMap.values()) { 912 Map<ServerName, List<RegionInfo>> tableResult = 913 result.computeIfAbsent(node.getTable(), t -> new HashMap<>()); 914 final ServerName serverName = node.getRegionLocation(); 915 if (serverName == null) { 916 LOG.info("Skipping, no server for " + node); 917 continue; 918 } 919 List<RegionInfo> serverResult = 920 tableResult.computeIfAbsent(serverName, s -> new ArrayList<>()); 921 serverResult.add(node.getRegionInfo()); 922 } 923 // Add online servers with no assignment for the table. 924 for (Map<ServerName, List<RegionInfo>> table : result.values()) { 925 for (ServerName serverName : serverMap.keySet()) { 926 table.putIfAbsent(serverName, new ArrayList<>()); 927 } 928 } 929 } else { 930 final HashMap<ServerName, List<RegionInfo>> ensemble = new HashMap<>(serverMap.size()); 931 for (ServerStateNode serverNode : serverMap.values()) { 932 ensemble.put(serverNode.getServerName(), serverNode.getRegionInfoList()); 933 } 934 // Use a fake table name to represent the whole cluster's assignments 935 result.put(HConstants.ENSEMBLE_TABLE_NAME, ensemble); 936 } 937 return result; 938 } 939 940 // ========================================================================== 941 // Region in transition helpers 942 // ========================================================================== 943 protected boolean addRegionInTransition(final RegionStateNode regionNode, 944 final RegionTransitionProcedure procedure) { 945 if (procedure != null && !regionNode.setProcedure(procedure)) return false; 946 947 regionInTransition.put(regionNode.getRegionInfo(), regionNode); 948 return true; 949 } 950 951 protected void removeRegionInTransition(final RegionStateNode regionNode, 952 final RegionTransitionProcedure procedure) { 953 regionInTransition.remove(regionNode.getRegionInfo()); 954 regionNode.unsetProcedure(procedure); 955 } 956 957 public boolean hasRegionsInTransition() { 958 return !regionInTransition.isEmpty(); 959 } 960 961 public boolean isRegionInTransition(final RegionInfo regionInfo) { 962 final RegionStateNode node = regionInTransition.get(regionInfo); 963 return node != null ? node.isInTransition() : false; 964 } 965 966 /** 967 * @return If a procedure-in-transition for <code>hri</code>, return it else null. 968 */ 969 public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) { 970 RegionStateNode node = regionInTransition.get(hri); 971 if (node == null) return null; 972 return node.getProcedure(); 973 } 974 975 public RegionState getRegionTransitionState(final RegionInfo hri) { 976 RegionStateNode node = regionInTransition.get(hri); 977 if (node == null) return null; 978 979 synchronized (node) { 980 return node.isInTransition() ? node.toRegionState() : null; 981 } 982 } 983 984 public List<RegionStateNode> getRegionsInTransition() { 985 return new ArrayList<RegionStateNode>(regionInTransition.values()); 986 } 987 988 /** 989 * Get the number of regions in transition. 990 */ 991 public int getRegionsInTransitionCount() { 992 return regionInTransition.size(); 993 } 994 995 public List<RegionState> getRegionsStateInTransition() { 996 final List<RegionState> rit = new ArrayList<RegionState>(regionInTransition.size()); 997 for (RegionStateNode node: regionInTransition.values()) { 998 rit.add(node.toRegionState()); 999 } 1000 return rit; 1001 } 1002 1003 public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() { 1004 final SortedSet<RegionState> rit = new TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR); 1005 for (RegionStateNode node: regionInTransition.values()) { 1006 rit.add(node.toRegionState()); 1007 } 1008 return rit; 1009 } 1010 1011 // ========================================================================== 1012 // Region offline helpers 1013 // ========================================================================== 1014 // TODO: Populated when we read meta but regions never make it out of here. 1015 public void addToOfflineRegions(final RegionStateNode regionNode) { 1016 LOG.info("Added to offline, CURRENTLY NEVER CLEARED!!! " + regionNode); 1017 regionOffline.put(regionNode.getRegionInfo(), regionNode); 1018 } 1019 1020 // TODO: Unused. 1021 public void removeFromOfflineRegions(final RegionInfo regionInfo) { 1022 regionOffline.remove(regionInfo); 1023 } 1024 1025 // ========================================================================== 1026 // Region FAIL_OPEN helpers 1027 // ========================================================================== 1028 public static final class RegionFailedOpen { 1029 private final RegionStateNode regionNode; 1030 1031 private volatile Exception exception = null; 1032 private AtomicInteger retries = new AtomicInteger(); 1033 1034 public RegionFailedOpen(final RegionStateNode regionNode) { 1035 this.regionNode = regionNode; 1036 } 1037 1038 public RegionStateNode getRegionStateNode() { 1039 return regionNode; 1040 } 1041 1042 public RegionInfo getRegionInfo() { 1043 return regionNode.getRegionInfo(); 1044 } 1045 1046 public int incrementAndGetRetries() { 1047 return this.retries.incrementAndGet(); 1048 } 1049 1050 public int getRetries() { 1051 return retries.get(); 1052 } 1053 1054 public void setException(final Exception exception) { 1055 this.exception = exception; 1056 } 1057 1058 public Exception getException() { 1059 return this.exception; 1060 } 1061 } 1062 1063 public RegionFailedOpen addToFailedOpen(final RegionStateNode regionNode) { 1064 final byte[] key = regionNode.getRegionInfo().getRegionName(); 1065 RegionFailedOpen node = regionFailedOpen.get(key); 1066 if (node == null) { 1067 RegionFailedOpen newNode = new RegionFailedOpen(regionNode); 1068 RegionFailedOpen oldNode = regionFailedOpen.putIfAbsent(key, newNode); 1069 node = oldNode != null ? oldNode : newNode; 1070 } 1071 return node; 1072 } 1073 1074 public RegionFailedOpen getFailedOpen(final RegionInfo regionInfo) { 1075 return regionFailedOpen.get(regionInfo.getRegionName()); 1076 } 1077 1078 public void removeFromFailedOpen(final RegionInfo regionInfo) { 1079 regionFailedOpen.remove(regionInfo.getRegionName()); 1080 } 1081 1082 public List<RegionState> getRegionFailedOpen() { 1083 if (regionFailedOpen.isEmpty()) return Collections.emptyList(); 1084 1085 ArrayList<RegionState> regions = new ArrayList<RegionState>(regionFailedOpen.size()); 1086 for (RegionFailedOpen r: regionFailedOpen.values()) { 1087 regions.add(r.getRegionStateNode().toRegionState()); 1088 } 1089 return regions; 1090 } 1091 1092 // ========================================================================== 1093 // Servers 1094 // ========================================================================== 1095 1096 /** 1097 * Be judicious calling this method. Do it on server register ONLY otherwise 1098 * you could mess up online server accounting. TOOD: Review usage and convert 1099 * to {@link #getServerNode(ServerName)} where we can. 1100 */ 1101 ServerStateNode getOrCreateServer(final ServerName serverName) { 1102 ServerStateNode node = serverMap.get(serverName); 1103 if (node == null) { 1104 LOG.trace("CREATING! {}", serverName, new RuntimeException("WHERE AM I?")); 1105 node = new ServerStateNode(serverName); 1106 ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node); 1107 node = oldNode != null ? oldNode : node; 1108 } 1109 return node; 1110 } 1111 1112 public void removeServer(final ServerName serverName) { 1113 serverMap.remove(serverName); 1114 } 1115 1116 public ServerStateNode getServerNode(final ServerName serverName) { 1117 return serverMap.get(serverName); 1118 } 1119 1120 public double getAverageLoad() { 1121 int numServers = 0; 1122 int totalLoad = 0; 1123 for (ServerStateNode node: serverMap.values()) { 1124 totalLoad += node.getRegionCount(); 1125 numServers++; 1126 } 1127 return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers; 1128 } 1129 1130 /** 1131 * Add reference to region to serverstatenode. 1132 * DOES NOT AUTO-CREATE ServerStateNode instance. 1133 * @return Return serverstatenode or null if none. 1134 */ 1135 ServerStateNode addRegionToServer(final RegionStateNode regionNode) { 1136 ServerStateNode ssn = getServerNode(regionNode.getRegionLocation()); 1137 if (ssn == null) { 1138 return ssn; 1139 } 1140 ssn.addRegion(regionNode); 1141 return ssn; 1142 } 1143 1144 public boolean isReplicaAvailableForRegion(final RegionInfo info) { 1145 // if the region info itself is a replica return true. 1146 if (!RegionReplicaUtil.isDefaultReplica(info)) { 1147 return true; 1148 } 1149 // iterate the regionsMap for the given region name. If there are replicas it should 1150 // list them in order. 1151 for (RegionStateNode node : regionsMap.tailMap(info.getRegionName()).values()) { 1152 if (!node.getTable().equals(info.getTable()) 1153 || !ServerRegionReplicaUtil.isReplicasForSameRegion(info, node.getRegionInfo())) { 1154 break; 1155 } else if (!RegionReplicaUtil.isDefaultReplica(node.getRegionInfo())) { 1156 // we have replicas 1157 return true; 1158 } 1159 } 1160 // we don have replicas 1161 return false; 1162 } 1163 1164 public ServerStateNode removeRegionFromServer(final ServerName serverName, 1165 final RegionStateNode regionNode) { 1166 ServerStateNode serverNode = getServerNode(serverName); 1167 if (serverNode != null) { 1168 serverNode.removeRegion(regionNode); 1169 } 1170 return serverNode; 1171 } 1172 1173 // ========================================================================== 1174 // ToString helpers 1175 // ========================================================================== 1176 public static String regionNamesToString(final Collection<byte[]> regions) { 1177 final StringBuilder sb = new StringBuilder(); 1178 final Iterator<byte[]> it = regions.iterator(); 1179 sb.append("["); 1180 if (it.hasNext()) { 1181 sb.append(Bytes.toStringBinary(it.next())); 1182 while (it.hasNext()) { 1183 sb.append(", "); 1184 sb.append(Bytes.toStringBinary(it.next())); 1185 } 1186 } 1187 sb.append("]"); 1188 return sb.toString(); 1189 } 1190}