001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE; 021import static org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper.FAVORED_NODES_NUM; 022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY; 023import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY; 024import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY; 025 026import edu.umd.cs.findbugs.annotations.NonNull; 027import java.io.IOException; 028import java.util.ArrayList; 029import java.util.Collection; 030import java.util.HashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.Map.Entry; 034import java.util.Set; 035 036import org.apache.hadoop.hbase.HBaseIOException; 037import org.apache.hadoop.hbase.ServerMetrics; 038import org.apache.hadoop.hbase.ServerName; 039import org.apache.hadoop.hbase.TableName; 040import org.apache.hadoop.hbase.client.RegionInfo; 041import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; 042import org.apache.hadoop.hbase.favored.FavoredNodesManager; 043import org.apache.hadoop.hbase.favored.FavoredNodesPlan; 044import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position; 045import org.apache.hadoop.hbase.favored.FavoredNodesPromoter; 046import org.apache.hadoop.hbase.master.LoadBalancer; 047import org.apache.hadoop.hbase.master.MasterServices; 048import org.apache.hadoop.hbase.master.RegionPlan; 049import org.apache.hadoop.hbase.util.Pair; 050import org.apache.yetus.audience.InterfaceAudience; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 055import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 056import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 057 058/** 059 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that 060 * assigns favored nodes for each region. There is a Primary RegionServer that hosts 061 * the region, and then there is Secondary and Tertiary RegionServers. Currently, the 062 * favored nodes information is used in creating HDFS files - the Primary RegionServer 063 * passes the primary, secondary, tertiary node addresses as hints to the 064 * DistributedFileSystem API for creating files on the filesystem. These nodes are 065 * treated as hints by the HDFS to place the blocks of the file. This alleviates the 066 * problem to do with reading from remote nodes (since we can make the Secondary 067 * RegionServer as the new Primary RegionServer) after a region is recovered. This 068 * should help provide consistent read latencies for the regions even when their 069 * primary region servers die. This provides two 070 * {@link CandidateGenerator} 071 * 072 */ 073@InterfaceAudience.Private 074public class FavoredStochasticBalancer extends StochasticLoadBalancer implements 075 FavoredNodesPromoter { 076 077 private static final Logger LOG = LoggerFactory.getLogger(FavoredStochasticBalancer.class); 078 private FavoredNodesManager fnm; 079 080 @Override 081 public void initialize() throws HBaseIOException { 082 configureGenerators(); 083 super.initialize(); 084 } 085 086 protected void configureGenerators() { 087 List<CandidateGenerator> fnPickers = new ArrayList<>(2); 088 fnPickers.add(new FavoredNodeLoadPicker()); 089 fnPickers.add(new FavoredNodeLocalityPicker()); 090 setCandidateGenerators(fnPickers); 091 } 092 093 @Override 094 public synchronized void setMasterServices(MasterServices masterServices) { 095 super.setMasterServices(masterServices); 096 fnm = masterServices.getFavoredNodesManager(); 097 } 098 099 /* 100 * Round robin assignment: Segregate the regions into two types: 101 * 102 * 1. The regions that have favored node assignment where at least one of the favored node 103 * is still alive. In this case, try to adhere to the current favored nodes assignment as 104 * much as possible - i.e., if the current primary is gone, then make the secondary or 105 * tertiary as the new host for the region (based on their current load). Note that we don't 106 * change the favored node assignments here (even though one or more favored node is 107 * currently down). That will be done by the admin operations. 108 * 109 * 2. The regions that currently don't have favored node assignments. Generate favored nodes 110 * for them and then assign. Generate the primary fn in round robin fashion and generate 111 * secondary and tertiary as per favored nodes constraints. 112 */ 113 @Override 114 @NonNull 115 public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, 116 List<ServerName> servers) throws HBaseIOException { 117 118 metricsBalancer.incrMiscInvocations(); 119 120 Set<RegionInfo> regionSet = Sets.newHashSet(regions); 121 Map<ServerName, List<RegionInfo>> assignmentMap = assignMasterSystemRegions(regions, servers); 122 if (!assignmentMap.isEmpty()) { 123 servers = new ArrayList<>(servers); 124 // Guarantee not to put other regions on master 125 servers.remove(masterServerName); 126 List<RegionInfo> masterRegions = assignmentMap.get(masterServerName); 127 if (!masterRegions.isEmpty()) { 128 for (RegionInfo region: masterRegions) { 129 regionSet.remove(region); 130 } 131 } 132 } 133 134 if (regionSet.isEmpty()) { 135 return assignmentMap; 136 } 137 138 try { 139 FavoredNodeAssignmentHelper helper = 140 new FavoredNodeAssignmentHelper(servers, fnm.getRackManager()); 141 helper.initialize(); 142 143 Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet); 144 regionSet.removeAll(systemRegions); 145 146 // Assign all system regions 147 Map<ServerName, List<RegionInfo>> systemAssignments = 148 super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers); 149 150 // Segregate favored and non-favored nodes regions and assign accordingly. 151 Pair<Map<ServerName,List<RegionInfo>>, List<RegionInfo>> segregatedRegions = 152 segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers); 153 Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst(); 154 Map<ServerName, List<RegionInfo>> regionsWithoutFN = 155 generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond()); 156 157 // merge the assignment maps 158 mergeAssignmentMaps(assignmentMap, systemAssignments); 159 mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap); 160 mergeAssignmentMaps(assignmentMap, regionsWithoutFN); 161 162 } catch (Exception ex) { 163 throw new HBaseIOException("Encountered exception while doing favored-nodes assignment " 164 + ex + " Falling back to regular assignment", ex); 165 } 166 return assignmentMap; 167 } 168 169 private void mergeAssignmentMaps(Map<ServerName, List<RegionInfo>> assignmentMap, 170 Map<ServerName, List<RegionInfo>> otherAssignments) { 171 172 if (otherAssignments == null || otherAssignments.isEmpty()) { 173 return; 174 } 175 176 for (Entry<ServerName, List<RegionInfo>> entry : otherAssignments.entrySet()) { 177 ServerName sn = entry.getKey(); 178 List<RegionInfo> regionsList = entry.getValue(); 179 if (assignmentMap.get(sn) == null) { 180 assignmentMap.put(sn, Lists.newArrayList(regionsList)); 181 } else { 182 assignmentMap.get(sn).addAll(regionsList); 183 } 184 } 185 } 186 187 private Map<ServerName, List<RegionInfo>> generateFNForRegionsWithoutFN( 188 FavoredNodeAssignmentHelper helper, List<RegionInfo> regions) throws IOException { 189 190 Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap(); 191 Map<RegionInfo, List<ServerName>> regionsNoFNMap; 192 193 if (regions.size() > 0) { 194 regionsNoFNMap = helper.generateFavoredNodesRoundRobin(assignmentMap, regions); 195 fnm.updateFavoredNodes(regionsNoFNMap); 196 } 197 return assignmentMap; 198 } 199 200 /* 201 * Return a pair - one with assignments when favored nodes are present and another with regions 202 * without favored nodes. 203 */ 204 private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> 205 segregateRegionsAndAssignRegionsWithFavoredNodes(Collection<RegionInfo> regions, 206 List<ServerName> onlineServers) throws HBaseIOException { 207 208 // Since we expect FN to be present most of the time, lets create map with same size 209 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes = 210 new HashMap<>(onlineServers.size()); 211 List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>(); 212 213 for (RegionInfo region : regions) { 214 List<ServerName> favoredNodes = fnm.getFavoredNodes(region); 215 ServerName primaryHost = null; 216 ServerName secondaryHost = null; 217 ServerName tertiaryHost = null; 218 219 if (favoredNodes != null && !favoredNodes.isEmpty()) { 220 for (ServerName s : favoredNodes) { 221 ServerName serverWithLegitStartCode = getServerFromFavoredNode(onlineServers, s); 222 if (serverWithLegitStartCode != null) { 223 FavoredNodesPlan.Position position = 224 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s); 225 if (Position.PRIMARY.equals(position)) { 226 primaryHost = serverWithLegitStartCode; 227 } else if (Position.SECONDARY.equals(position)) { 228 secondaryHost = serverWithLegitStartCode; 229 } else if (Position.TERTIARY.equals(position)) { 230 tertiaryHost = serverWithLegitStartCode; 231 } 232 } 233 } 234 assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, primaryHost, 235 secondaryHost, tertiaryHost); 236 } else { 237 regionsWithNoFavoredNodes.add(region); 238 } 239 } 240 return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes); 241 } 242 243 private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, 244 RegionInfo region, ServerName host) { 245 246 List<RegionInfo> regionsOnServer; 247 if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) { 248 regionsOnServer = Lists.newArrayList(); 249 assignmentMapForFavoredNodes.put(host, regionsOnServer); 250 } 251 regionsOnServer.add(region); 252 } 253 254 /* 255 * Get the ServerName for the FavoredNode. Since FN's startcode is -1, we could want to get the 256 * ServerName with the correct start code from the list of provided servers. 257 */ 258 private ServerName getServerFromFavoredNode(List<ServerName> servers, ServerName fn) { 259 for (ServerName server : servers) { 260 if (ServerName.isSameAddress(fn, server)) { 261 return server; 262 } 263 } 264 return null; 265 } 266 267 /* 268 * Assign the region to primary if its available. If both secondary and tertiary are available, 269 * assign to the host which has less load. Else assign to secondary or tertiary whichever is 270 * available (in that order). 271 */ 272 private void assignRegionToAvailableFavoredNode( 273 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region, 274 ServerName primaryHost, ServerName secondaryHost, ServerName tertiaryHost) { 275 276 if (primaryHost != null) { 277 addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost); 278 279 } else if (secondaryHost != null && tertiaryHost != null) { 280 281 // Assign the region to the one with a lower load (both have the desired hdfs blocks) 282 ServerName s; 283 ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost); 284 ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost); 285 if (secondaryLoad != null && tertiaryLoad != null) { 286 if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) { 287 s = secondaryHost; 288 } else { 289 s = tertiaryHost; 290 } 291 } else { 292 // We don't have one/more load, lets just choose a random node 293 s = RANDOM.nextBoolean() ? secondaryHost : tertiaryHost; 294 } 295 addRegionToMap(assignmentMapForFavoredNodes, region, s); 296 } else if (secondaryHost != null) { 297 addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost); 298 } else if (tertiaryHost != null) { 299 addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost); 300 } else { 301 // No favored nodes are online, lets assign to BOGUS server 302 addRegionToMap(assignmentMapForFavoredNodes, region, BOGUS_SERVER_NAME); 303 } 304 } 305 306 /* 307 * If we have favored nodes for a region, we will return one of the FN as destination. If 308 * favored nodes are not present for a region, we will generate and return one of the FN as 309 * destination. If we can't generate anything, lets fallback. 310 */ 311 @Override 312 public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) 313 throws HBaseIOException { 314 315 if (servers != null && servers.contains(masterServerName)) { 316 if (shouldBeOnMaster(regionInfo)) { 317 metricsBalancer.incrMiscInvocations(); 318 return masterServerName; 319 } 320 if (!LoadBalancer.isTablesOnMaster(getConf())) { 321 // Guarantee we do not put any regions on master 322 servers = new ArrayList<>(servers); 323 servers.remove(masterServerName); 324 } 325 } 326 327 ServerName destination = null; 328 if (!FavoredNodesManager.isFavoredNodeApplicable(regionInfo)) { 329 return super.randomAssignment(regionInfo, servers); 330 } 331 332 metricsBalancer.incrMiscInvocations(); 333 334 List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo); 335 if (favoredNodes == null || favoredNodes.isEmpty()) { 336 // Generate new favored nodes and return primary 337 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf()); 338 helper.initialize(); 339 try { 340 favoredNodes = helper.generateFavoredNodes(regionInfo); 341 updateFavoredNodesForRegion(regionInfo, favoredNodes); 342 343 } catch (IOException e) { 344 LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + e); 345 throw new HBaseIOException(e); 346 } 347 } 348 349 List<ServerName> onlineServers = getOnlineFavoredNodes(servers, favoredNodes); 350 if (onlineServers.size() > 0) { 351 destination = onlineServers.get(RANDOM.nextInt(onlineServers.size())); 352 } 353 354 boolean alwaysAssign = getConf().getBoolean(FAVORED_ALWAYS_ASSIGN_REGIONS, true); 355 if (destination == null && alwaysAssign) { 356 LOG.warn("Can't generate FN for region: " + regionInfo + " falling back"); 357 destination = super.randomAssignment(regionInfo, servers); 358 } 359 return destination; 360 } 361 362 private void updateFavoredNodesForRegion(RegionInfo regionInfo, List<ServerName> newFavoredNodes) 363 throws IOException { 364 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 365 regionFNMap.put(regionInfo, newFavoredNodes); 366 fnm.updateFavoredNodes(regionFNMap); 367 } 368 369 /* 370 * Reuse BaseLoadBalancer's retainAssignment, but generate favored nodes when its missing. 371 */ 372 @Override 373 @NonNull 374 public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions, 375 List<ServerName> servers) throws HBaseIOException { 376 377 Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap(); 378 Map<ServerName, List<RegionInfo>> result = super.retainAssignment(regions, servers); 379 if (result.isEmpty()) { 380 LOG.warn("Nothing to assign to, probably no servers or no regions"); 381 return result; 382 } 383 384 // Guarantee not to put other regions on master 385 if (servers != null && servers.contains(masterServerName)) { 386 servers = new ArrayList<>(servers); 387 servers.remove(masterServerName); 388 } 389 390 // Lets check if favored nodes info is in META, if not generate now. 391 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf()); 392 helper.initialize(); 393 394 LOG.debug("Generating favored nodes for regions missing them."); 395 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 396 397 try { 398 for (Entry<ServerName, List<RegionInfo>> entry : result.entrySet()) { 399 400 ServerName sn = entry.getKey(); 401 ServerName primary = ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE); 402 403 for (RegionInfo hri : entry.getValue()) { 404 405 if (FavoredNodesManager.isFavoredNodeApplicable(hri)) { 406 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 407 if (favoredNodes == null || favoredNodes.size() < FAVORED_NODES_NUM) { 408 409 LOG.debug("Generating favored nodes for: " + hri + " with primary: " + primary); 410 ServerName[] secondaryAndTertiaryNodes = helper.getSecondaryAndTertiary(hri, primary); 411 if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) { 412 List<ServerName> newFavoredNodes = Lists.newArrayList(); 413 newFavoredNodes.add(primary); 414 newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(), 415 secondaryAndTertiaryNodes[0].getPort(), NON_STARTCODE)); 416 newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(), 417 secondaryAndTertiaryNodes[1].getPort(), NON_STARTCODE)); 418 regionFNMap.put(hri, newFavoredNodes); 419 addRegionToMap(assignmentMap, hri, sn); 420 421 } else { 422 throw new HBaseIOException("Cannot generate secondary/tertiary FN for " + hri 423 + " generated " 424 + (secondaryAndTertiaryNodes != null ? secondaryAndTertiaryNodes : " nothing")); 425 } 426 } else { 427 List<ServerName> onlineFN = getOnlineFavoredNodes(servers, favoredNodes); 428 if (onlineFN.isEmpty()) { 429 // All favored nodes are dead, lets assign it to BOGUS 430 addRegionToMap(assignmentMap, hri, BOGUS_SERVER_NAME); 431 } else { 432 // Is primary not on FN? Less likely, but we can still take care of this. 433 if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, sn) != null) { 434 addRegionToMap(assignmentMap, hri, sn); 435 } else { 436 ServerName destination = onlineFN.get(RANDOM.nextInt(onlineFN.size())); 437 LOG.warn("Region: " + hri + " not hosted on favored nodes: " + favoredNodes 438 + " current: " + sn + " moving to: " + destination); 439 addRegionToMap(assignmentMap, hri, destination); 440 } 441 } 442 } 443 } else { 444 addRegionToMap(assignmentMap, hri, sn); 445 } 446 } 447 } 448 449 if (!regionFNMap.isEmpty()) { 450 LOG.debug("Updating FN in meta for missing regions, count: " + regionFNMap.size()); 451 fnm.updateFavoredNodes(regionFNMap); 452 } 453 454 } catch (IOException e) { 455 throw new HBaseIOException("Cannot generate/update FN for regions: " + regionFNMap.keySet()); 456 } 457 458 return assignmentMap; 459 } 460 461 /* 462 * Return list of favored nodes that are online. 463 */ 464 private List<ServerName> getOnlineFavoredNodes(List<ServerName> onlineServers, 465 List<ServerName> serversWithoutStartCodes) { 466 if (serversWithoutStartCodes == null) { 467 return null; 468 } else { 469 List<ServerName> result = Lists.newArrayList(); 470 for (ServerName sn : serversWithoutStartCodes) { 471 for (ServerName online : onlineServers) { 472 if (ServerName.isSameAddress(sn, online)) { 473 result.add(online); 474 } 475 } 476 } 477 return result; 478 } 479 } 480 481 public synchronized List<ServerName> getFavoredNodes(RegionInfo regionInfo) { 482 return this.fnm.getFavoredNodes(regionInfo); 483 } 484 485 /* 486 * Generate Favored Nodes for daughters during region split. 487 * 488 * If the parent does not have FN, regenerates them for the daughters. 489 * 490 * If the parent has FN, inherit two FN from parent for each daughter and generate the remaining. 491 * The primary FN for both the daughters should be the same as parent. Inherit the secondary 492 * FN from the parent but keep it different for each daughter. Choose the remaining FN 493 * randomly. This would give us better distribution over a period of time after enough splits. 494 */ 495 @Override 496 public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent, 497 RegionInfo regionA, RegionInfo regionB) throws IOException { 498 499 Map<RegionInfo, List<ServerName>> result = new HashMap<>(); 500 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); 501 helper.initialize(); 502 503 List<ServerName> parentFavoredNodes = fnm.getFavoredNodes(parent); 504 if (parentFavoredNodes == null) { 505 LOG.debug("Unable to find favored nodes for parent, " + parent 506 + " generating new favored nodes for daughter"); 507 result.put(regionA, helper.generateFavoredNodes(regionA)); 508 result.put(regionB, helper.generateFavoredNodes(regionB)); 509 510 } else { 511 512 // Lets get the primary and secondary from parent for regionA 513 Set<ServerName> regionAFN = 514 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY); 515 result.put(regionA, Lists.newArrayList(regionAFN)); 516 517 // Lets get the primary and tertiary from parent for regionB 518 Set<ServerName> regionBFN = 519 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY); 520 result.put(regionB, Lists.newArrayList(regionBFN)); 521 } 522 523 fnm.updateFavoredNodes(result); 524 } 525 526 private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper, 527 List<ServerName> parentFavoredNodes, Position primary, Position secondary) 528 throws IOException { 529 530 Set<ServerName> daughterFN = Sets.newLinkedHashSet(); 531 if (parentFavoredNodes.size() >= primary.ordinal()) { 532 daughterFN.add(parentFavoredNodes.get(primary.ordinal())); 533 } 534 535 if (parentFavoredNodes.size() >= secondary.ordinal()) { 536 daughterFN.add(parentFavoredNodes.get(secondary.ordinal())); 537 } 538 539 while (daughterFN.size() < FAVORED_NODES_NUM) { 540 ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN)); 541 daughterFN.add(newNode); 542 } 543 return daughterFN; 544 } 545 546 /* 547 * Generate favored nodes for a region during merge. Choose the FN from one of the sources to 548 * keep it simple. 549 */ 550 @Override 551 public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo [] mergeParents) 552 throws IOException { 553 updateFavoredNodesForRegion(merged, fnm.getFavoredNodes(mergeParents[0])); 554 } 555 556 /* 557 * Pick favored nodes with the highest locality for a region with lowest locality. 558 */ 559 private class FavoredNodeLocalityPicker extends CandidateGenerator { 560 561 @Override 562 protected Cluster.Action generate(Cluster cluster) { 563 564 int thisServer = pickRandomServer(cluster); 565 int thisRegion; 566 if (thisServer == -1) { 567 LOG.trace("Could not pick lowest local region server"); 568 return Cluster.NullAction; 569 } else { 570 // Pick lowest local region on this server 571 thisRegion = pickLowestLocalRegionOnServer(cluster, thisServer); 572 } 573 if (thisRegion == -1) { 574 if (cluster.regionsPerServer[thisServer].length > 0) { 575 LOG.trace("Could not pick lowest local region even when region server held " 576 + cluster.regionsPerServer[thisServer].length + " regions"); 577 } 578 return Cluster.NullAction; 579 } 580 581 RegionInfo hri = cluster.regions[thisRegion]; 582 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 583 int otherServer; 584 if (favoredNodes == null) { 585 if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) { 586 otherServer = pickOtherRandomServer(cluster, thisServer); 587 } else { 588 // No FN, ignore 589 LOG.trace("Ignoring, no favored nodes for region: " + hri); 590 return Cluster.NullAction; 591 } 592 } else { 593 // Pick other favored node with the highest locality 594 otherServer = getDifferentFavoredNode(cluster, favoredNodes, thisServer); 595 } 596 return getAction(thisServer, thisRegion, otherServer, -1); 597 } 598 599 private int getDifferentFavoredNode(Cluster cluster, List<ServerName> favoredNodes, 600 int currentServer) { 601 List<Integer> fnIndex = new ArrayList<>(); 602 for (ServerName sn : favoredNodes) { 603 if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) { 604 fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort())); 605 } 606 } 607 float locality = 0; 608 int highestLocalRSIndex = -1; 609 for (Integer index : fnIndex) { 610 if (index != currentServer) { 611 float temp = cluster.localityPerServer[index]; 612 if (temp >= locality) { 613 locality = temp; 614 highestLocalRSIndex = index; 615 } 616 } 617 } 618 return highestLocalRSIndex; 619 } 620 621 private int pickLowestLocalRegionOnServer(Cluster cluster, int server) { 622 return cluster.getLowestLocalityRegionOnServer(server); 623 } 624 } 625 626 /* 627 * This is like LoadCandidateGenerator, but we choose appropriate FN for the region on the 628 * most loaded server. 629 */ 630 class FavoredNodeLoadPicker extends CandidateGenerator { 631 632 @Override 633 Cluster.Action generate(Cluster cluster) { 634 cluster.sortServersByRegionCount(); 635 int thisServer = pickMostLoadedServer(cluster); 636 int thisRegion = pickRandomRegion(cluster, thisServer, 0); 637 RegionInfo hri = cluster.regions[thisRegion]; 638 int otherServer; 639 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 640 if (favoredNodes == null) { 641 if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) { 642 otherServer = pickLeastLoadedServer(cluster, thisServer); 643 } else { 644 return Cluster.NullAction; 645 } 646 } else { 647 otherServer = pickLeastLoadedFNServer(cluster, favoredNodes, thisServer); 648 } 649 return getAction(thisServer, thisRegion, otherServer, -1); 650 } 651 652 private int pickLeastLoadedServer(final Cluster cluster, int thisServer) { 653 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 654 int index; 655 for (index = 0; index < servers.length ; index++) { 656 if ((servers[index] != null) && servers[index] != thisServer) { 657 break; 658 } 659 } 660 return servers[index]; 661 } 662 663 private int pickLeastLoadedFNServer(final Cluster cluster, List<ServerName> favoredNodes, 664 int currentServerIndex) { 665 List<Integer> fnIndex = new ArrayList<>(); 666 for (ServerName sn : favoredNodes) { 667 if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) { 668 fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort())); 669 } 670 } 671 int leastLoadedFN = -1; 672 int load = Integer.MAX_VALUE; 673 for (Integer index : fnIndex) { 674 if (index != currentServerIndex) { 675 int temp = cluster.getNumRegions(index); 676 if (temp < load) { 677 load = temp; 678 leastLoadedFN = index; 679 } 680 } 681 } 682 return leastLoadedFN; 683 } 684 685 private int pickMostLoadedServer(final Cluster cluster) { 686 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 687 int index; 688 for (index = servers.length - 1; index > 0 ; index--) { 689 if (servers[index] != null) { 690 break; 691 } 692 } 693 return servers[index]; 694 } 695 } 696 697 /* 698 * For all regions correctly assigned to favored nodes, we just use the stochastic balancer 699 * implementation. For the misplaced regions, we assign a bogus server to it and AM takes care. 700 */ 701 @Override 702 public synchronized List<RegionPlan> balanceTable(TableName tableName, 703 Map<ServerName, List<RegionInfo>> loadOfOneTable) { 704 705 if (this.services != null) { 706 707 List<RegionPlan> regionPlans = Lists.newArrayList(); 708 Map<ServerName, List<RegionInfo>> correctAssignments = new HashMap<>(); 709 int misplacedRegions = 0; 710 711 for (Entry<ServerName, List<RegionInfo>> entry : loadOfOneTable.entrySet()) { 712 ServerName current = entry.getKey(); 713 List<RegionInfo> regions = Lists.newArrayList(); 714 correctAssignments.put(current, regions); 715 716 for (RegionInfo hri : entry.getValue()) { 717 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 718 if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, current) != null || 719 !FavoredNodesManager.isFavoredNodeApplicable(hri)) { 720 regions.add(hri); 721 722 } else { 723 // No favored nodes, lets unassign. 724 LOG.warn("Region not on favored nodes, unassign. Region: " + hri 725 + " current: " + current + " favored nodes: " + favoredNodes); 726 try { 727 this.services.getAssignmentManager().unassign(hri); 728 } catch (IOException e) { 729 LOG.warn("Failed unassign", e); 730 continue; 731 } 732 RegionPlan rp = new RegionPlan(hri, null, null); 733 regionPlans.add(rp); 734 misplacedRegions++; 735 } 736 } 737 } 738 LOG.debug("Found misplaced regions: " + misplacedRegions + ", not on favored nodes."); 739 List<RegionPlan> regionPlansFromBalance = super.balanceTable(tableName, correctAssignments); 740 if (regionPlansFromBalance != null) { 741 regionPlans.addAll(regionPlansFromBalance); 742 } 743 return regionPlans; 744 } else { 745 return super.balanceTable(tableName, loadOfOneTable); 746 } 747 } 748} 749