001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE; 021import static org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper.FAVORED_NODES_NUM; 022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY; 023import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY; 024import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY; 025 026import edu.umd.cs.findbugs.annotations.NonNull; 027import java.io.IOException; 028import java.util.ArrayList; 029import java.util.Collection; 030import java.util.HashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.Map.Entry; 034import java.util.Set; 035import java.util.concurrent.ThreadLocalRandom; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.hbase.HBaseIOException; 038import org.apache.hadoop.hbase.ServerMetrics; 039import org.apache.hadoop.hbase.ServerName; 040import org.apache.hadoop.hbase.TableName; 041import org.apache.hadoop.hbase.client.RegionInfo; 042import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; 043import org.apache.hadoop.hbase.favored.FavoredNodesManager; 044import org.apache.hadoop.hbase.favored.FavoredNodesPlan; 045import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position; 046import org.apache.hadoop.hbase.favored.FavoredNodesPromoter; 047import org.apache.hadoop.hbase.master.LoadBalancer; 048import org.apache.hadoop.hbase.master.MasterServices; 049import org.apache.hadoop.hbase.master.RegionPlan; 050import org.apache.hadoop.hbase.util.Pair; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 056import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 057import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 058 059/** 060 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that assigns favored 061 * nodes for each region. There is a Primary RegionServer that hosts the region, and then there is 062 * Secondary and Tertiary RegionServers. Currently, the favored nodes information is used in 063 * creating HDFS files - the Primary RegionServer passes the primary, secondary, tertiary node 064 * addresses as hints to the DistributedFileSystem API for creating files on the filesystem. These 065 * nodes are treated as hints by the HDFS to place the blocks of the file. This alleviates the 066 * problem to do with reading from remote nodes (since we can make the Secondary RegionServer as the 067 * new Primary RegionServer) after a region is recovered. This should help provide consistent read 068 * latencies for the regions even when their primary region servers die. This provides two 069 * {@link CandidateGenerator} 070 */ 071@InterfaceAudience.Private 072public class FavoredStochasticBalancer extends StochasticLoadBalancer 073 implements FavoredNodesPromoter { 074 075 private static final Logger LOG = LoggerFactory.getLogger(FavoredStochasticBalancer.class); 076 private FavoredNodesManager fnm; 077 078 @Override 079 protected List<CandidateGenerator> createCandidateGenerators() { 080 List<CandidateGenerator> fnPickers = new ArrayList<>(2); 081 fnPickers.add(new FavoredNodeLoadPicker()); 082 fnPickers.add(new FavoredNodeLocalityPicker()); 083 return fnPickers; 084 } 085 086 /** Returns any candidate generator in random */ 087 @Override 088 protected CandidateGenerator getRandomGenerator() { 089 return candidateGenerators.get(ThreadLocalRandom.current().nextInt(candidateGenerators.size())); 090 } 091 092 @Override 093 public synchronized void setMasterServices(MasterServices masterServices) { 094 super.setMasterServices(masterServices); 095 fnm = masterServices.getFavoredNodesManager(); 096 } 097 098 /* 099 * Round robin assignment: Segregate the regions into two types: 1. The regions that have favored 100 * node assignment where at least one of the favored node is still alive. In this case, try to 101 * adhere to the current favored nodes assignment as much as possible - i.e., if the current 102 * primary is gone, then make the secondary or tertiary as the new host for the region (based on 103 * their current load). Note that we don't change the favored node assignments here (even though 104 * one or more favored node is currently down). That will be done by the admin operations. 2. The 105 * regions that currently don't have favored node assignments. Generate favored nodes for them and 106 * then assign. Generate the primary fn in round robin fashion and generate secondary and tertiary 107 * as per favored nodes constraints. 108 */ 109 @Override 110 @NonNull 111 public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, 112 List<ServerName> servers) throws HBaseIOException { 113 114 metricsBalancer.incrMiscInvocations(); 115 116 Set<RegionInfo> regionSet = Sets.newHashSet(regions); 117 Map<ServerName, List<RegionInfo>> assignmentMap = assignMasterSystemRegions(regions, servers); 118 if (!assignmentMap.isEmpty()) { 119 servers = new ArrayList<>(servers); 120 // Guarantee not to put other regions on master 121 servers.remove(masterServerName); 122 List<RegionInfo> masterRegions = assignmentMap.get(masterServerName); 123 if (!masterRegions.isEmpty()) { 124 for (RegionInfo region : masterRegions) { 125 regionSet.remove(region); 126 } 127 } 128 } 129 130 if (regionSet.isEmpty()) { 131 return assignmentMap; 132 } 133 134 try { 135 FavoredNodeAssignmentHelper helper = 136 new FavoredNodeAssignmentHelper(servers, fnm.getRackManager()); 137 helper.initialize(); 138 139 Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet); 140 regionSet.removeAll(systemRegions); 141 142 // Assign all system regions 143 Map<ServerName, List<RegionInfo>> systemAssignments = 144 super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers); 145 146 // Segregate favored and non-favored nodes regions and assign accordingly. 147 Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> segregatedRegions = 148 segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers); 149 Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst(); 150 Map<ServerName, List<RegionInfo>> regionsWithoutFN = 151 generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond()); 152 153 // merge the assignment maps 154 mergeAssignmentMaps(assignmentMap, systemAssignments); 155 mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap); 156 mergeAssignmentMaps(assignmentMap, regionsWithoutFN); 157 158 } catch (Exception ex) { 159 throw new HBaseIOException("Encountered exception while doing favored-nodes assignment " + ex 160 + " Falling back to regular assignment", ex); 161 } 162 return assignmentMap; 163 } 164 165 private void mergeAssignmentMaps(Map<ServerName, List<RegionInfo>> assignmentMap, 166 Map<ServerName, List<RegionInfo>> otherAssignments) { 167 168 if (otherAssignments == null || otherAssignments.isEmpty()) { 169 return; 170 } 171 172 for (Entry<ServerName, List<RegionInfo>> entry : otherAssignments.entrySet()) { 173 ServerName sn = entry.getKey(); 174 List<RegionInfo> regionsList = entry.getValue(); 175 if (assignmentMap.get(sn) == null) { 176 assignmentMap.put(sn, Lists.newArrayList(regionsList)); 177 } else { 178 assignmentMap.get(sn).addAll(regionsList); 179 } 180 } 181 } 182 183 private Map<ServerName, List<RegionInfo>> generateFNForRegionsWithoutFN( 184 FavoredNodeAssignmentHelper helper, List<RegionInfo> regions) throws IOException { 185 186 Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap(); 187 Map<RegionInfo, List<ServerName>> regionsNoFNMap; 188 189 if (regions.size() > 0) { 190 regionsNoFNMap = helper.generateFavoredNodesRoundRobin(assignmentMap, regions); 191 fnm.updateFavoredNodes(regionsNoFNMap); 192 } 193 return assignmentMap; 194 } 195 196 /* 197 * Return a pair - one with assignments when favored nodes are present and another with regions 198 * without favored nodes. 199 */ 200 private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> 201 segregateRegionsAndAssignRegionsWithFavoredNodes(Collection<RegionInfo> regions, 202 List<ServerName> onlineServers) throws HBaseIOException { 203 204 // Since we expect FN to be present most of the time, lets create map with same size 205 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes = 206 new HashMap<>(onlineServers.size()); 207 List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>(); 208 209 for (RegionInfo region : regions) { 210 List<ServerName> favoredNodes = fnm.getFavoredNodes(region); 211 ServerName primaryHost = null; 212 ServerName secondaryHost = null; 213 ServerName tertiaryHost = null; 214 215 if (favoredNodes != null && !favoredNodes.isEmpty()) { 216 for (ServerName s : favoredNodes) { 217 ServerName serverWithLegitStartCode = getServerFromFavoredNode(onlineServers, s); 218 if (serverWithLegitStartCode != null) { 219 FavoredNodesPlan.Position position = 220 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s); 221 if (Position.PRIMARY.equals(position)) { 222 primaryHost = serverWithLegitStartCode; 223 } else if (Position.SECONDARY.equals(position)) { 224 secondaryHost = serverWithLegitStartCode; 225 } else if (Position.TERTIARY.equals(position)) { 226 tertiaryHost = serverWithLegitStartCode; 227 } 228 } 229 } 230 assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, primaryHost, 231 secondaryHost, tertiaryHost); 232 } else { 233 regionsWithNoFavoredNodes.add(region); 234 } 235 } 236 return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes); 237 } 238 239 private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, 240 RegionInfo region, ServerName host) { 241 242 List<RegionInfo> regionsOnServer; 243 if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) { 244 regionsOnServer = Lists.newArrayList(); 245 assignmentMapForFavoredNodes.put(host, regionsOnServer); 246 } 247 regionsOnServer.add(region); 248 } 249 250 /* 251 * Get the ServerName for the FavoredNode. Since FN's startcode is -1, we could want to get the 252 * ServerName with the correct start code from the list of provided servers. 253 */ 254 private ServerName getServerFromFavoredNode(List<ServerName> servers, ServerName fn) { 255 for (ServerName server : servers) { 256 if (ServerName.isSameAddress(fn, server)) { 257 return server; 258 } 259 } 260 return null; 261 } 262 263 /* 264 * Assign the region to primary if its available. If both secondary and tertiary are available, 265 * assign to the host which has less load. Else assign to secondary or tertiary whichever is 266 * available (in that order). 267 */ 268 private void assignRegionToAvailableFavoredNode( 269 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region, 270 ServerName primaryHost, ServerName secondaryHost, ServerName tertiaryHost) { 271 272 if (primaryHost != null) { 273 addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost); 274 275 } else if (secondaryHost != null && tertiaryHost != null) { 276 277 // Assign the region to the one with a lower load (both have the desired hdfs blocks) 278 ServerName s; 279 ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost); 280 ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost); 281 if (secondaryLoad != null && tertiaryLoad != null) { 282 if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) { 283 s = secondaryHost; 284 } else { 285 s = tertiaryHost; 286 } 287 } else { 288 // We don't have one/more load, lets just choose a random node 289 s = ThreadLocalRandom.current().nextBoolean() ? secondaryHost : tertiaryHost; 290 } 291 addRegionToMap(assignmentMapForFavoredNodes, region, s); 292 } else if (secondaryHost != null) { 293 addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost); 294 } else if (tertiaryHost != null) { 295 addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost); 296 } else { 297 // No favored nodes are online, lets assign to BOGUS server 298 addRegionToMap(assignmentMapForFavoredNodes, region, BOGUS_SERVER_NAME); 299 } 300 } 301 302 /* 303 * If we have favored nodes for a region, we will return one of the FN as destination. If favored 304 * nodes are not present for a region, we will generate and return one of the FN as destination. 305 * If we can't generate anything, lets fallback. 306 */ 307 @Override 308 public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) 309 throws HBaseIOException { 310 311 if (servers != null && servers.contains(masterServerName)) { 312 if (shouldBeOnMaster(regionInfo)) { 313 metricsBalancer.incrMiscInvocations(); 314 return masterServerName; 315 } 316 if (!LoadBalancer.isTablesOnMaster(getConf())) { 317 // Guarantee we do not put any regions on master 318 servers = new ArrayList<>(servers); 319 servers.remove(masterServerName); 320 } 321 } 322 323 ServerName destination = null; 324 if (!FavoredNodesManager.isFavoredNodeApplicable(regionInfo)) { 325 return super.randomAssignment(regionInfo, servers); 326 } 327 328 metricsBalancer.incrMiscInvocations(); 329 330 Configuration conf = getConf(); 331 List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo); 332 if (favoredNodes == null || favoredNodes.isEmpty()) { 333 // Generate new favored nodes and return primary 334 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, conf); 335 helper.initialize(); 336 try { 337 favoredNodes = helper.generateFavoredNodes(regionInfo); 338 updateFavoredNodesForRegion(regionInfo, favoredNodes); 339 340 } catch (IOException e) { 341 LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + e); 342 throw new HBaseIOException(e); 343 } 344 } 345 346 List<ServerName> onlineServers = getOnlineFavoredNodes(servers, favoredNodes); 347 if (onlineServers.size() > 0) { 348 destination = onlineServers.get(ThreadLocalRandom.current().nextInt(onlineServers.size())); 349 } 350 351 boolean alwaysAssign = conf.getBoolean(FAVORED_ALWAYS_ASSIGN_REGIONS, true); 352 if (destination == null && alwaysAssign) { 353 LOG.warn("Can't generate FN for region: " + regionInfo + " falling back"); 354 destination = super.randomAssignment(regionInfo, servers); 355 } 356 return destination; 357 } 358 359 private void updateFavoredNodesForRegion(RegionInfo regionInfo, List<ServerName> newFavoredNodes) 360 throws IOException { 361 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 362 regionFNMap.put(regionInfo, newFavoredNodes); 363 fnm.updateFavoredNodes(regionFNMap); 364 } 365 366 /* 367 * Reuse BaseLoadBalancer's retainAssignment, but generate favored nodes when its missing. 368 */ 369 @Override 370 @NonNull 371 public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions, 372 List<ServerName> servers) throws HBaseIOException { 373 374 Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap(); 375 Map<ServerName, List<RegionInfo>> result = super.retainAssignment(regions, servers); 376 if (result.isEmpty()) { 377 LOG.warn("Nothing to assign to, probably no servers or no regions"); 378 return result; 379 } 380 381 // Guarantee not to put other regions on master 382 if (servers != null && servers.contains(masterServerName)) { 383 servers = new ArrayList<>(servers); 384 servers.remove(masterServerName); 385 } 386 387 // Lets check if favored nodes info is in META, if not generate now. 388 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf()); 389 helper.initialize(); 390 391 LOG.debug("Generating favored nodes for regions missing them."); 392 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 393 394 try { 395 for (Entry<ServerName, List<RegionInfo>> entry : result.entrySet()) { 396 397 ServerName sn = entry.getKey(); 398 ServerName primary = ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE); 399 400 for (RegionInfo hri : entry.getValue()) { 401 402 if (FavoredNodesManager.isFavoredNodeApplicable(hri)) { 403 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 404 if (favoredNodes == null || favoredNodes.size() < FAVORED_NODES_NUM) { 405 406 LOG.debug("Generating favored nodes for: " + hri + " with primary: " + primary); 407 ServerName[] secondaryAndTertiaryNodes = helper.getSecondaryAndTertiary(hri, primary); 408 if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) { 409 List<ServerName> newFavoredNodes = Lists.newArrayList(); 410 newFavoredNodes.add(primary); 411 newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(), 412 secondaryAndTertiaryNodes[0].getPort(), NON_STARTCODE)); 413 newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(), 414 secondaryAndTertiaryNodes[1].getPort(), NON_STARTCODE)); 415 regionFNMap.put(hri, newFavoredNodes); 416 addRegionToMap(assignmentMap, hri, sn); 417 418 } else { 419 throw new HBaseIOException( 420 "Cannot generate secondary/tertiary FN for " + hri + " generated " 421 + (secondaryAndTertiaryNodes != null ? secondaryAndTertiaryNodes : " nothing")); 422 } 423 } else { 424 List<ServerName> onlineFN = getOnlineFavoredNodes(servers, favoredNodes); 425 if (onlineFN.isEmpty()) { 426 // All favored nodes are dead, lets assign it to BOGUS 427 addRegionToMap(assignmentMap, hri, BOGUS_SERVER_NAME); 428 } else { 429 // Is primary not on FN? Less likely, but we can still take care of this. 430 if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, sn) != null) { 431 addRegionToMap(assignmentMap, hri, sn); 432 } else { 433 ServerName destination = 434 onlineFN.get(ThreadLocalRandom.current().nextInt(onlineFN.size())); 435 LOG.warn("Region: " + hri + " not hosted on favored nodes: " + favoredNodes 436 + " current: " + sn + " moving to: " + destination); 437 addRegionToMap(assignmentMap, hri, destination); 438 } 439 } 440 } 441 } else { 442 addRegionToMap(assignmentMap, hri, sn); 443 } 444 } 445 } 446 447 if (!regionFNMap.isEmpty()) { 448 LOG.debug("Updating FN in meta for missing regions, count: " + regionFNMap.size()); 449 fnm.updateFavoredNodes(regionFNMap); 450 } 451 452 } catch (IOException e) { 453 throw new HBaseIOException("Cannot generate/update FN for regions: " + regionFNMap.keySet()); 454 } 455 456 return assignmentMap; 457 } 458 459 /* 460 * Return list of favored nodes that are online. 461 */ 462 private List<ServerName> getOnlineFavoredNodes(List<ServerName> onlineServers, 463 List<ServerName> serversWithoutStartCodes) { 464 if (serversWithoutStartCodes == null) { 465 return null; 466 } else { 467 List<ServerName> result = Lists.newArrayList(); 468 for (ServerName sn : serversWithoutStartCodes) { 469 for (ServerName online : onlineServers) { 470 if (ServerName.isSameAddress(sn, online)) { 471 result.add(online); 472 } 473 } 474 } 475 return result; 476 } 477 } 478 479 public synchronized List<ServerName> getFavoredNodes(RegionInfo regionInfo) { 480 return this.fnm.getFavoredNodes(regionInfo); 481 } 482 483 /* 484 * Generate Favored Nodes for daughters during region split. If the parent does not have FN, 485 * regenerates them for the daughters. If the parent has FN, inherit two FN from parent for each 486 * daughter and generate the remaining. The primary FN for both the daughters should be the same 487 * as parent. Inherit the secondary FN from the parent but keep it different for each daughter. 488 * Choose the remaining FN randomly. This would give us better distribution over a period of time 489 * after enough splits. 490 */ 491 @Override 492 public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent, 493 RegionInfo regionA, RegionInfo regionB) throws IOException { 494 495 Map<RegionInfo, List<ServerName>> result = new HashMap<>(); 496 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); 497 helper.initialize(); 498 499 List<ServerName> parentFavoredNodes = fnm.getFavoredNodes(parent); 500 if (parentFavoredNodes == null) { 501 LOG.debug("Unable to find favored nodes for parent, " + parent 502 + " generating new favored nodes for daughter"); 503 result.put(regionA, helper.generateFavoredNodes(regionA)); 504 result.put(regionB, helper.generateFavoredNodes(regionB)); 505 506 } else { 507 508 // Lets get the primary and secondary from parent for regionA 509 Set<ServerName> regionAFN = 510 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY); 511 result.put(regionA, Lists.newArrayList(regionAFN)); 512 513 // Lets get the primary and tertiary from parent for regionB 514 Set<ServerName> regionBFN = 515 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY); 516 result.put(regionB, Lists.newArrayList(regionBFN)); 517 } 518 519 fnm.updateFavoredNodes(result); 520 } 521 522 private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper, 523 List<ServerName> parentFavoredNodes, Position primary, Position secondary) throws IOException { 524 525 Set<ServerName> daughterFN = Sets.newLinkedHashSet(); 526 if (parentFavoredNodes.size() >= primary.ordinal()) { 527 daughterFN.add(parentFavoredNodes.get(primary.ordinal())); 528 } 529 530 if (parentFavoredNodes.size() >= secondary.ordinal()) { 531 daughterFN.add(parentFavoredNodes.get(secondary.ordinal())); 532 } 533 534 while (daughterFN.size() < FAVORED_NODES_NUM) { 535 ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN)); 536 daughterFN.add(newNode); 537 } 538 return daughterFN; 539 } 540 541 /* 542 * Generate favored nodes for a region during merge. Choose the FN from one of the sources to keep 543 * it simple. 544 */ 545 @Override 546 public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo[] mergeParents) 547 throws IOException { 548 updateFavoredNodesForRegion(merged, fnm.getFavoredNodes(mergeParents[0])); 549 } 550 551 /* 552 * Pick favored nodes with the highest locality for a region with lowest locality. 553 */ 554 private class FavoredNodeLocalityPicker extends CandidateGenerator { 555 556 @Override 557 protected BalanceAction generate(BalancerClusterState cluster) { 558 559 int thisServer = pickRandomServer(cluster); 560 int thisRegion; 561 if (thisServer == -1) { 562 LOG.trace("Could not pick lowest local region server"); 563 return BalanceAction.NULL_ACTION; 564 } else { 565 // Pick lowest local region on this server 566 thisRegion = pickLowestLocalRegionOnServer(cluster, thisServer); 567 } 568 if (thisRegion == -1) { 569 if (cluster.regionsPerServer[thisServer].length > 0) { 570 LOG.trace("Could not pick lowest local region even when region server held " 571 + cluster.regionsPerServer[thisServer].length + " regions"); 572 } 573 return BalanceAction.NULL_ACTION; 574 } 575 576 RegionInfo hri = cluster.regions[thisRegion]; 577 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 578 int otherServer; 579 if (favoredNodes == null) { 580 if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) { 581 otherServer = pickOtherRandomServer(cluster, thisServer); 582 } else { 583 // No FN, ignore 584 LOG.trace("Ignoring, no favored nodes for region: " + hri); 585 return BalanceAction.NULL_ACTION; 586 } 587 } else { 588 // Pick other favored node with the highest locality 589 otherServer = getDifferentFavoredNode(cluster, favoredNodes, thisServer); 590 } 591 return getAction(thisServer, thisRegion, otherServer, -1); 592 } 593 594 private int getDifferentFavoredNode(BalancerClusterState cluster, List<ServerName> favoredNodes, 595 int currentServer) { 596 List<Integer> fnIndex = new ArrayList<>(); 597 for (ServerName sn : favoredNodes) { 598 if (cluster.serversToIndex.containsKey(sn.getAddress())) { 599 fnIndex.add(cluster.serversToIndex.get(sn.getAddress())); 600 } 601 } 602 float locality = 0; 603 int highestLocalRSIndex = -1; 604 for (Integer index : fnIndex) { 605 if (index != currentServer) { 606 float temp = cluster.localityPerServer[index]; 607 if (temp >= locality) { 608 locality = temp; 609 highestLocalRSIndex = index; 610 } 611 } 612 } 613 return highestLocalRSIndex; 614 } 615 616 private int pickLowestLocalRegionOnServer(BalancerClusterState cluster, int server) { 617 return cluster.getLowestLocalityRegionOnServer(server); 618 } 619 } 620 621 /* 622 * This is like LoadCandidateGenerator, but we choose appropriate FN for the region on the most 623 * loaded server. 624 */ 625 class FavoredNodeLoadPicker extends CandidateGenerator { 626 627 @Override 628 BalanceAction generate(BalancerClusterState cluster) { 629 cluster.sortServersByRegionCount(); 630 int thisServer = pickMostLoadedServer(cluster); 631 int thisRegion = pickRandomRegion(cluster, thisServer, 0); 632 RegionInfo hri = cluster.regions[thisRegion]; 633 int otherServer; 634 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 635 if (favoredNodes == null) { 636 if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) { 637 otherServer = pickLeastLoadedServer(cluster, thisServer); 638 } else { 639 return BalanceAction.NULL_ACTION; 640 } 641 } else { 642 otherServer = pickLeastLoadedFNServer(cluster, favoredNodes, thisServer); 643 } 644 return getAction(thisServer, thisRegion, otherServer, -1); 645 } 646 647 private int pickLeastLoadedServer(final BalancerClusterState cluster, int thisServer) { 648 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 649 int index; 650 for (index = 0; index < servers.length; index++) { 651 if ((servers[index] != null) && servers[index] != thisServer) { 652 break; 653 } 654 } 655 return servers[index]; 656 } 657 658 private int pickLeastLoadedFNServer(final BalancerClusterState cluster, 659 List<ServerName> favoredNodes, int currentServerIndex) { 660 List<Integer> fnIndex = new ArrayList<>(); 661 for (ServerName sn : favoredNodes) { 662 if (cluster.serversToIndex.containsKey(sn.getAddress())) { 663 fnIndex.add(cluster.serversToIndex.get(sn.getAddress())); 664 } 665 } 666 int leastLoadedFN = -1; 667 int load = Integer.MAX_VALUE; 668 for (Integer index : fnIndex) { 669 if (index != currentServerIndex) { 670 int temp = cluster.getNumRegions(index); 671 if (temp < load) { 672 load = temp; 673 leastLoadedFN = index; 674 } 675 } 676 } 677 return leastLoadedFN; 678 } 679 680 private int pickMostLoadedServer(final BalancerClusterState cluster) { 681 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 682 int index; 683 for (index = servers.length - 1; index > 0; index--) { 684 if (servers[index] != null) { 685 break; 686 } 687 } 688 return servers[index]; 689 } 690 } 691 692 /* 693 * For all regions correctly assigned to favored nodes, we just use the stochastic balancer 694 * implementation. For the misplaced regions, we assign a bogus server to it and AM takes care. 695 */ 696 @Override 697 protected List<RegionPlan> balanceTable(TableName tableName, 698 Map<ServerName, List<RegionInfo>> loadOfOneTable) { 699 700 if (this.services != null) { 701 702 List<RegionPlan> regionPlans = Lists.newArrayList(); 703 Map<ServerName, List<RegionInfo>> correctAssignments = new HashMap<>(); 704 int misplacedRegions = 0; 705 706 for (Entry<ServerName, List<RegionInfo>> entry : loadOfOneTable.entrySet()) { 707 ServerName current = entry.getKey(); 708 List<RegionInfo> regions = Lists.newArrayList(); 709 correctAssignments.put(current, regions); 710 711 for (RegionInfo hri : entry.getValue()) { 712 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 713 if ( 714 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, current) != null 715 || !FavoredNodesManager.isFavoredNodeApplicable(hri) 716 ) { 717 regions.add(hri); 718 719 } else { 720 // No favored nodes, lets unassign. 721 LOG.warn("Region not on favored nodes, unassign. Region: " + hri + " current: " 722 + current + " favored nodes: " + favoredNodes); 723 try { 724 this.services.getAssignmentManager().unassign(hri); 725 } catch (IOException e) { 726 LOG.warn("Failed unassign", e); 727 continue; 728 } 729 RegionPlan rp = new RegionPlan(hri, null, null); 730 regionPlans.add(rp); 731 misplacedRegions++; 732 } 733 } 734 } 735 LOG.debug("Found misplaced regions: " + misplacedRegions + ", not on favored nodes."); 736 List<RegionPlan> regionPlansFromBalance = super.balanceTable(tableName, correctAssignments); 737 if (regionPlansFromBalance != null) { 738 regionPlans.addAll(regionPlansFromBalance); 739 } 740 return regionPlans; 741 } else { 742 return super.balanceTable(tableName, loadOfOneTable); 743 } 744 } 745}