001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE; 021import static org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper.FAVORED_NODES_NUM; 022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY; 023import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY; 024import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY; 025 026import java.io.IOException; 027import java.util.ArrayList; 028import java.util.Collection; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032import java.util.Map.Entry; 033import java.util.Set; 034import org.apache.hadoop.hbase.HBaseIOException; 035import org.apache.hadoop.hbase.ServerMetrics; 036import org.apache.hadoop.hbase.ServerName; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; 040import org.apache.hadoop.hbase.favored.FavoredNodesManager; 041import org.apache.hadoop.hbase.favored.FavoredNodesPlan; 042import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position; 043import org.apache.hadoop.hbase.favored.FavoredNodesPromoter; 044import org.apache.hadoop.hbase.master.LoadBalancer; 045import org.apache.hadoop.hbase.master.MasterServices; 046import org.apache.hadoop.hbase.master.RegionPlan; 047import org.apache.hadoop.hbase.util.Pair; 048import org.apache.yetus.audience.InterfaceAudience; 049import org.slf4j.Logger; 050import org.slf4j.LoggerFactory; 051 052import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 053import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 054import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 055 056/** 057 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that 058 * assigns favored nodes for each region. There is a Primary RegionServer that hosts 059 * the region, and then there is Secondary and Tertiary RegionServers. Currently, the 060 * favored nodes information is used in creating HDFS files - the Primary RegionServer 061 * passes the primary, secondary, tertiary node addresses as hints to the 062 * DistributedFileSystem API for creating files on the filesystem. These nodes are 063 * treated as hints by the HDFS to place the blocks of the file. This alleviates the 064 * problem to do with reading from remote nodes (since we can make the Secondary 065 * RegionServer as the new Primary RegionServer) after a region is recovered. This 066 * should help provide consistent read latencies for the regions even when their 067 * primary region servers die. This provides two 068 * {@link CandidateGenerator} 069 * 070 */ 071@InterfaceAudience.Private 072public class FavoredStochasticBalancer extends StochasticLoadBalancer implements 073 FavoredNodesPromoter { 074 075 private static final Logger LOG = LoggerFactory.getLogger(FavoredStochasticBalancer.class); 076 private FavoredNodesManager fnm; 077 078 @Override 079 public void initialize() throws HBaseIOException { 080 configureGenerators(); 081 super.initialize(); 082 } 083 084 protected void configureGenerators() { 085 List<CandidateGenerator> fnPickers = new ArrayList<>(2); 086 fnPickers.add(new FavoredNodeLoadPicker()); 087 fnPickers.add(new FavoredNodeLocalityPicker()); 088 setCandidateGenerators(fnPickers); 089 } 090 091 @Override 092 public synchronized void setMasterServices(MasterServices masterServices) { 093 super.setMasterServices(masterServices); 094 fnm = masterServices.getFavoredNodesManager(); 095 } 096 097 /* 098 * Round robin assignment: Segregate the regions into two types: 099 * 100 * 1. The regions that have favored node assignment where at least one of the favored node 101 * is still alive. In this case, try to adhere to the current favored nodes assignment as 102 * much as possible - i.e., if the current primary is gone, then make the secondary or 103 * tertiary as the new host for the region (based on their current load). Note that we don't 104 * change the favored node assignments here (even though one or more favored node is 105 * currently down). That will be done by the admin operations. 106 * 107 * 2. The regions that currently don't have favored node assignments. Generate favored nodes 108 * for them and then assign. Generate the primary fn in round robin fashion and generate 109 * secondary and tertiary as per favored nodes constraints. 110 */ 111 @Override 112 public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, 113 List<ServerName> servers) throws HBaseIOException { 114 115 metricsBalancer.incrMiscInvocations(); 116 117 Set<RegionInfo> regionSet = Sets.newHashSet(regions); 118 Map<ServerName, List<RegionInfo>> assignmentMap = assignMasterSystemRegions(regions, servers); 119 if (assignmentMap != null && !assignmentMap.isEmpty()) { 120 servers = new ArrayList<>(servers); 121 // Guarantee not to put other regions on master 122 servers.remove(masterServerName); 123 List<RegionInfo> masterRegions = assignmentMap.get(masterServerName); 124 if (!masterRegions.isEmpty()) { 125 for (RegionInfo region: masterRegions) { 126 regionSet.remove(region); 127 } 128 } 129 } 130 131 if (regionSet.isEmpty()) { 132 return assignmentMap; 133 } 134 135 try { 136 FavoredNodeAssignmentHelper helper = 137 new FavoredNodeAssignmentHelper(servers, fnm.getRackManager()); 138 helper.initialize(); 139 140 Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet); 141 regionSet.removeAll(systemRegions); 142 143 // Assign all system regions 144 Map<ServerName, List<RegionInfo>> systemAssignments = 145 super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers); 146 147 // Segregate favored and non-favored nodes regions and assign accordingly. 148 Pair<Map<ServerName,List<RegionInfo>>, List<RegionInfo>> segregatedRegions = 149 segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers); 150 Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst(); 151 Map<ServerName, List<RegionInfo>> regionsWithoutFN = 152 generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond()); 153 154 // merge the assignment maps 155 mergeAssignmentMaps(assignmentMap, systemAssignments); 156 mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap); 157 mergeAssignmentMaps(assignmentMap, regionsWithoutFN); 158 159 } catch (Exception ex) { 160 throw new HBaseIOException("Encountered exception while doing favored-nodes assignment " 161 + ex + " Falling back to regular assignment", ex); 162 } 163 return assignmentMap; 164 } 165 166 private void mergeAssignmentMaps(Map<ServerName, List<RegionInfo>> assignmentMap, 167 Map<ServerName, List<RegionInfo>> otherAssignments) { 168 169 if (otherAssignments == null || otherAssignments.isEmpty()) { 170 return; 171 } 172 173 for (Entry<ServerName, List<RegionInfo>> entry : otherAssignments.entrySet()) { 174 ServerName sn = entry.getKey(); 175 List<RegionInfo> regionsList = entry.getValue(); 176 if (assignmentMap.get(sn) == null) { 177 assignmentMap.put(sn, Lists.newArrayList(regionsList)); 178 } else { 179 assignmentMap.get(sn).addAll(regionsList); 180 } 181 } 182 } 183 184 private Map<ServerName, List<RegionInfo>> generateFNForRegionsWithoutFN( 185 FavoredNodeAssignmentHelper helper, List<RegionInfo> regions) throws IOException { 186 187 Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap(); 188 Map<RegionInfo, List<ServerName>> regionsNoFNMap; 189 190 if (regions.size() > 0) { 191 regionsNoFNMap = helper.generateFavoredNodesRoundRobin(assignmentMap, regions); 192 fnm.updateFavoredNodes(regionsNoFNMap); 193 } 194 return assignmentMap; 195 } 196 197 /* 198 * Return a pair - one with assignments when favored nodes are present and another with regions 199 * without favored nodes. 200 */ 201 private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> 202 segregateRegionsAndAssignRegionsWithFavoredNodes(Collection<RegionInfo> regions, 203 List<ServerName> onlineServers) throws HBaseIOException { 204 205 // Since we expect FN to be present most of the time, lets create map with same size 206 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes = 207 new HashMap<>(onlineServers.size()); 208 List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>(); 209 210 for (RegionInfo region : regions) { 211 List<ServerName> favoredNodes = fnm.getFavoredNodes(region); 212 ServerName primaryHost = null; 213 ServerName secondaryHost = null; 214 ServerName tertiaryHost = null; 215 216 if (favoredNodes != null && !favoredNodes.isEmpty()) { 217 for (ServerName s : favoredNodes) { 218 ServerName serverWithLegitStartCode = getServerFromFavoredNode(onlineServers, s); 219 if (serverWithLegitStartCode != null) { 220 FavoredNodesPlan.Position position = 221 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s); 222 if (Position.PRIMARY.equals(position)) { 223 primaryHost = serverWithLegitStartCode; 224 } else if (Position.SECONDARY.equals(position)) { 225 secondaryHost = serverWithLegitStartCode; 226 } else if (Position.TERTIARY.equals(position)) { 227 tertiaryHost = serverWithLegitStartCode; 228 } 229 } 230 } 231 assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, primaryHost, 232 secondaryHost, tertiaryHost); 233 } else { 234 regionsWithNoFavoredNodes.add(region); 235 } 236 } 237 return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes); 238 } 239 240 private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, 241 RegionInfo region, ServerName host) { 242 243 List<RegionInfo> regionsOnServer; 244 if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) { 245 regionsOnServer = Lists.newArrayList(); 246 assignmentMapForFavoredNodes.put(host, regionsOnServer); 247 } 248 regionsOnServer.add(region); 249 } 250 251 /* 252 * Get the ServerName for the FavoredNode. Since FN's startcode is -1, we could want to get the 253 * ServerName with the correct start code from the list of provided servers. 254 */ 255 private ServerName getServerFromFavoredNode(List<ServerName> servers, ServerName fn) { 256 for (ServerName server : servers) { 257 if (ServerName.isSameAddress(fn, server)) { 258 return server; 259 } 260 } 261 return null; 262 } 263 264 /* 265 * Assign the region to primary if its available. If both secondary and tertiary are available, 266 * assign to the host which has less load. Else assign to secondary or tertiary whichever is 267 * available (in that order). 268 */ 269 private void assignRegionToAvailableFavoredNode( 270 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region, 271 ServerName primaryHost, ServerName secondaryHost, ServerName tertiaryHost) { 272 273 if (primaryHost != null) { 274 addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost); 275 276 } else if (secondaryHost != null && tertiaryHost != null) { 277 278 // Assign the region to the one with a lower load (both have the desired hdfs blocks) 279 ServerName s; 280 ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost); 281 ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost); 282 if (secondaryLoad != null && tertiaryLoad != null) { 283 if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) { 284 s = secondaryHost; 285 } else { 286 s = tertiaryHost; 287 } 288 } else { 289 // We don't have one/more load, lets just choose a random node 290 s = RANDOM.nextBoolean() ? secondaryHost : tertiaryHost; 291 } 292 addRegionToMap(assignmentMapForFavoredNodes, region, s); 293 } else if (secondaryHost != null) { 294 addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost); 295 } else if (tertiaryHost != null) { 296 addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost); 297 } else { 298 // No favored nodes are online, lets assign to BOGUS server 299 addRegionToMap(assignmentMapForFavoredNodes, region, BOGUS_SERVER_NAME); 300 } 301 } 302 303 /* 304 * If we have favored nodes for a region, we will return one of the FN as destination. If 305 * favored nodes are not present for a region, we will generate and return one of the FN as 306 * destination. If we can't generate anything, lets fallback. 307 */ 308 @Override 309 public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) 310 throws HBaseIOException { 311 312 if (servers != null && servers.contains(masterServerName)) { 313 if (shouldBeOnMaster(regionInfo)) { 314 metricsBalancer.incrMiscInvocations(); 315 return masterServerName; 316 } 317 if (!LoadBalancer.isTablesOnMaster(getConf())) { 318 // Guarantee we do not put any regions on master 319 servers = new ArrayList<>(servers); 320 servers.remove(masterServerName); 321 } 322 } 323 324 ServerName destination = null; 325 if (!FavoredNodesManager.isFavoredNodeApplicable(regionInfo)) { 326 return super.randomAssignment(regionInfo, servers); 327 } 328 329 metricsBalancer.incrMiscInvocations(); 330 331 List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo); 332 if (favoredNodes == null || favoredNodes.isEmpty()) { 333 // Generate new favored nodes and return primary 334 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf()); 335 helper.initialize(); 336 try { 337 favoredNodes = helper.generateFavoredNodes(regionInfo); 338 updateFavoredNodesForRegion(regionInfo, favoredNodes); 339 340 } catch (IOException e) { 341 LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + e); 342 throw new HBaseIOException(e); 343 } 344 } 345 346 List<ServerName> onlineServers = getOnlineFavoredNodes(servers, favoredNodes); 347 if (onlineServers.size() > 0) { 348 destination = onlineServers.get(RANDOM.nextInt(onlineServers.size())); 349 } 350 351 boolean alwaysAssign = getConf().getBoolean(FAVORED_ALWAYS_ASSIGN_REGIONS, true); 352 if (destination == null && alwaysAssign) { 353 LOG.warn("Can't generate FN for region: " + regionInfo + " falling back"); 354 destination = super.randomAssignment(regionInfo, servers); 355 } 356 return destination; 357 } 358 359 private void updateFavoredNodesForRegion(RegionInfo regionInfo, List<ServerName> newFavoredNodes) 360 throws IOException { 361 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 362 regionFNMap.put(regionInfo, newFavoredNodes); 363 fnm.updateFavoredNodes(regionFNMap); 364 } 365 366 /* 367 * Reuse BaseLoadBalancer's retainAssignment, but generate favored nodes when its missing. 368 */ 369 @Override 370 public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions, 371 List<ServerName> servers) throws HBaseIOException { 372 373 Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap(); 374 Map<ServerName, List<RegionInfo>> result = super.retainAssignment(regions, servers); 375 if (result == null || result.isEmpty()) { 376 LOG.warn("Nothing to assign to, probably no servers or no regions"); 377 return null; 378 } 379 380 // Guarantee not to put other regions on master 381 if (servers != null && servers.contains(masterServerName)) { 382 servers = new ArrayList<>(servers); 383 servers.remove(masterServerName); 384 } 385 386 // Lets check if favored nodes info is in META, if not generate now. 387 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf()); 388 helper.initialize(); 389 390 LOG.debug("Generating favored nodes for regions missing them."); 391 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 392 393 try { 394 for (Entry<ServerName, List<RegionInfo>> entry : result.entrySet()) { 395 396 ServerName sn = entry.getKey(); 397 ServerName primary = ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE); 398 399 for (RegionInfo hri : entry.getValue()) { 400 401 if (FavoredNodesManager.isFavoredNodeApplicable(hri)) { 402 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 403 if (favoredNodes == null || favoredNodes.size() < FAVORED_NODES_NUM) { 404 405 LOG.debug("Generating favored nodes for: " + hri + " with primary: " + primary); 406 ServerName[] secondaryAndTertiaryNodes = helper.getSecondaryAndTertiary(hri, primary); 407 if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) { 408 List<ServerName> newFavoredNodes = Lists.newArrayList(); 409 newFavoredNodes.add(primary); 410 newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(), 411 secondaryAndTertiaryNodes[0].getPort(), NON_STARTCODE)); 412 newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(), 413 secondaryAndTertiaryNodes[1].getPort(), NON_STARTCODE)); 414 regionFNMap.put(hri, newFavoredNodes); 415 addRegionToMap(assignmentMap, hri, sn); 416 417 } else { 418 throw new HBaseIOException("Cannot generate secondary/tertiary FN for " + hri 419 + " generated " 420 + (secondaryAndTertiaryNodes != null ? secondaryAndTertiaryNodes : " nothing")); 421 } 422 } else { 423 List<ServerName> onlineFN = getOnlineFavoredNodes(servers, favoredNodes); 424 if (onlineFN.isEmpty()) { 425 // All favored nodes are dead, lets assign it to BOGUS 426 addRegionToMap(assignmentMap, hri, BOGUS_SERVER_NAME); 427 } else { 428 // Is primary not on FN? Less likely, but we can still take care of this. 429 if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, sn) != null) { 430 addRegionToMap(assignmentMap, hri, sn); 431 } else { 432 ServerName destination = onlineFN.get(RANDOM.nextInt(onlineFN.size())); 433 LOG.warn("Region: " + hri + " not hosted on favored nodes: " + favoredNodes 434 + " current: " + sn + " moving to: " + destination); 435 addRegionToMap(assignmentMap, hri, destination); 436 } 437 } 438 } 439 } else { 440 addRegionToMap(assignmentMap, hri, sn); 441 } 442 } 443 } 444 445 if (!regionFNMap.isEmpty()) { 446 LOG.debug("Updating FN in meta for missing regions, count: " + regionFNMap.size()); 447 fnm.updateFavoredNodes(regionFNMap); 448 } 449 450 } catch (IOException e) { 451 throw new HBaseIOException("Cannot generate/update FN for regions: " + regionFNMap.keySet()); 452 } 453 454 return assignmentMap; 455 } 456 457 /* 458 * Return list of favored nodes that are online. 459 */ 460 private List<ServerName> getOnlineFavoredNodes(List<ServerName> onlineServers, 461 List<ServerName> serversWithoutStartCodes) { 462 if (serversWithoutStartCodes == null) { 463 return null; 464 } else { 465 List<ServerName> result = Lists.newArrayList(); 466 for (ServerName sn : serversWithoutStartCodes) { 467 for (ServerName online : onlineServers) { 468 if (ServerName.isSameAddress(sn, online)) { 469 result.add(online); 470 } 471 } 472 } 473 return result; 474 } 475 } 476 477 public synchronized List<ServerName> getFavoredNodes(RegionInfo regionInfo) { 478 return this.fnm.getFavoredNodes(regionInfo); 479 } 480 481 /* 482 * Generate Favored Nodes for daughters during region split. 483 * 484 * If the parent does not have FN, regenerates them for the daughters. 485 * 486 * If the parent has FN, inherit two FN from parent for each daughter and generate the remaining. 487 * The primary FN for both the daughters should be the same as parent. Inherit the secondary 488 * FN from the parent but keep it different for each daughter. Choose the remaining FN 489 * randomly. This would give us better distribution over a period of time after enough splits. 490 */ 491 @Override 492 public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent, 493 RegionInfo regionA, RegionInfo regionB) throws IOException { 494 495 Map<RegionInfo, List<ServerName>> result = new HashMap<>(); 496 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); 497 helper.initialize(); 498 499 List<ServerName> parentFavoredNodes = fnm.getFavoredNodes(parent); 500 if (parentFavoredNodes == null) { 501 LOG.debug("Unable to find favored nodes for parent, " + parent 502 + " generating new favored nodes for daughter"); 503 result.put(regionA, helper.generateFavoredNodes(regionA)); 504 result.put(regionB, helper.generateFavoredNodes(regionB)); 505 506 } else { 507 508 // Lets get the primary and secondary from parent for regionA 509 Set<ServerName> regionAFN = 510 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY); 511 result.put(regionA, Lists.newArrayList(regionAFN)); 512 513 // Lets get the primary and tertiary from parent for regionB 514 Set<ServerName> regionBFN = 515 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY); 516 result.put(regionB, Lists.newArrayList(regionBFN)); 517 } 518 519 fnm.updateFavoredNodes(result); 520 } 521 522 private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper, 523 List<ServerName> parentFavoredNodes, Position primary, Position secondary) 524 throws IOException { 525 526 Set<ServerName> daughterFN = Sets.newLinkedHashSet(); 527 if (parentFavoredNodes.size() >= primary.ordinal()) { 528 daughterFN.add(parentFavoredNodes.get(primary.ordinal())); 529 } 530 531 if (parentFavoredNodes.size() >= secondary.ordinal()) { 532 daughterFN.add(parentFavoredNodes.get(secondary.ordinal())); 533 } 534 535 while (daughterFN.size() < FAVORED_NODES_NUM) { 536 ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN)); 537 daughterFN.add(newNode); 538 } 539 return daughterFN; 540 } 541 542 /* 543 * Generate favored nodes for a region during merge. Choose the FN from one of the sources to 544 * keep it simple. 545 */ 546 @Override 547 public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo [] mergeParents) 548 throws IOException { 549 updateFavoredNodesForRegion(merged, fnm.getFavoredNodes(mergeParents[0])); 550 } 551 552 /* 553 * Pick favored nodes with the highest locality for a region with lowest locality. 554 */ 555 private class FavoredNodeLocalityPicker extends CandidateGenerator { 556 557 @Override 558 protected Cluster.Action generate(Cluster cluster) { 559 560 int thisServer = pickRandomServer(cluster); 561 int thisRegion; 562 if (thisServer == -1) { 563 LOG.trace("Could not pick lowest local region server"); 564 return Cluster.NullAction; 565 } else { 566 // Pick lowest local region on this server 567 thisRegion = pickLowestLocalRegionOnServer(cluster, thisServer); 568 } 569 if (thisRegion == -1) { 570 if (cluster.regionsPerServer[thisServer].length > 0) { 571 LOG.trace("Could not pick lowest local region even when region server held " 572 + cluster.regionsPerServer[thisServer].length + " regions"); 573 } 574 return Cluster.NullAction; 575 } 576 577 RegionInfo hri = cluster.regions[thisRegion]; 578 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 579 int otherServer; 580 if (favoredNodes == null) { 581 if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) { 582 otherServer = pickOtherRandomServer(cluster, thisServer); 583 } else { 584 // No FN, ignore 585 LOG.trace("Ignoring, no favored nodes for region: " + hri); 586 return Cluster.NullAction; 587 } 588 } else { 589 // Pick other favored node with the highest locality 590 otherServer = getDifferentFavoredNode(cluster, favoredNodes, thisServer); 591 } 592 return getAction(thisServer, thisRegion, otherServer, -1); 593 } 594 595 private int getDifferentFavoredNode(Cluster cluster, List<ServerName> favoredNodes, 596 int currentServer) { 597 List<Integer> fnIndex = new ArrayList<>(); 598 for (ServerName sn : favoredNodes) { 599 if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) { 600 fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort())); 601 } 602 } 603 float locality = 0; 604 int highestLocalRSIndex = -1; 605 for (Integer index : fnIndex) { 606 if (index != currentServer) { 607 float temp = cluster.localityPerServer[index]; 608 if (temp >= locality) { 609 locality = temp; 610 highestLocalRSIndex = index; 611 } 612 } 613 } 614 return highestLocalRSIndex; 615 } 616 617 private int pickLowestLocalRegionOnServer(Cluster cluster, int server) { 618 return cluster.getLowestLocalityRegionOnServer(server); 619 } 620 } 621 622 /* 623 * This is like LoadCandidateGenerator, but we choose appropriate FN for the region on the 624 * most loaded server. 625 */ 626 class FavoredNodeLoadPicker extends CandidateGenerator { 627 628 @Override 629 Cluster.Action generate(Cluster cluster) { 630 cluster.sortServersByRegionCount(); 631 int thisServer = pickMostLoadedServer(cluster); 632 int thisRegion = pickRandomRegion(cluster, thisServer, 0); 633 RegionInfo hri = cluster.regions[thisRegion]; 634 int otherServer; 635 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 636 if (favoredNodes == null) { 637 if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) { 638 otherServer = pickLeastLoadedServer(cluster, thisServer); 639 } else { 640 return Cluster.NullAction; 641 } 642 } else { 643 otherServer = pickLeastLoadedFNServer(cluster, favoredNodes, thisServer); 644 } 645 return getAction(thisServer, thisRegion, otherServer, -1); 646 } 647 648 private int pickLeastLoadedServer(final Cluster cluster, int thisServer) { 649 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 650 int index; 651 for (index = 0; index < servers.length ; index++) { 652 if ((servers[index] != null) && servers[index] != thisServer) { 653 break; 654 } 655 } 656 return servers[index]; 657 } 658 659 private int pickLeastLoadedFNServer(final Cluster cluster, List<ServerName> favoredNodes, 660 int currentServerIndex) { 661 List<Integer> fnIndex = new ArrayList<>(); 662 for (ServerName sn : favoredNodes) { 663 if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) { 664 fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort())); 665 } 666 } 667 int leastLoadedFN = -1; 668 int load = Integer.MAX_VALUE; 669 for (Integer index : fnIndex) { 670 if (index != currentServerIndex) { 671 int temp = cluster.getNumRegions(index); 672 if (temp < load) { 673 load = temp; 674 leastLoadedFN = index; 675 } 676 } 677 } 678 return leastLoadedFN; 679 } 680 681 private int pickMostLoadedServer(final Cluster cluster) { 682 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 683 int index; 684 for (index = servers.length - 1; index > 0 ; index--) { 685 if (servers[index] != null) { 686 break; 687 } 688 } 689 return servers[index]; 690 } 691 } 692 693 /* 694 * For all regions correctly assigned to favored nodes, we just use the stochastic balancer 695 * implementation. For the misplaced regions, we assign a bogus server to it and AM takes care. 696 */ 697 @Override 698 public synchronized List<RegionPlan> balanceTable(TableName tableName, 699 Map<ServerName, List<RegionInfo>> loadOfOneTable) { 700 701 if (this.services != null) { 702 703 List<RegionPlan> regionPlans = Lists.newArrayList(); 704 Map<ServerName, List<RegionInfo>> correctAssignments = new HashMap<>(); 705 int misplacedRegions = 0; 706 707 for (Entry<ServerName, List<RegionInfo>> entry : loadOfOneTable.entrySet()) { 708 ServerName current = entry.getKey(); 709 List<RegionInfo> regions = Lists.newArrayList(); 710 correctAssignments.put(current, regions); 711 712 for (RegionInfo hri : entry.getValue()) { 713 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 714 if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, current) != null || 715 !FavoredNodesManager.isFavoredNodeApplicable(hri)) { 716 regions.add(hri); 717 718 } else { 719 // No favored nodes, lets unassign. 720 LOG.warn("Region not on favored nodes, unassign. Region: " + hri 721 + " current: " + current + " favored nodes: " + favoredNodes); 722 try { 723 this.services.getAssignmentManager().unassign(hri); 724 } catch (IOException e) { 725 LOG.warn("Failed unassign", e); 726 continue; 727 } 728 RegionPlan rp = new RegionPlan(hri, null, null); 729 regionPlans.add(rp); 730 misplacedRegions++; 731 } 732 } 733 } 734 LOG.debug("Found misplaced regions: " + misplacedRegions + ", not on favored nodes."); 735 List<RegionPlan> regionPlansFromBalance = super.balanceTable(tableName, correctAssignments); 736 if (regionPlansFromBalance != null) { 737 regionPlans.addAll(regionPlansFromBalance); 738 } 739 return regionPlans; 740 } else { 741 return super.balanceTable(tableName, loadOfOneTable); 742 } 743 } 744} 745