001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE;
021import static org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper.FAVORED_NODES_NUM;
022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
023import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
024import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
025
026import java.io.IOException;
027import java.util.ArrayList;
028import java.util.Collection;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.Map.Entry;
033import java.util.Set;
034import org.apache.hadoop.hbase.HBaseIOException;
035import org.apache.hadoop.hbase.ServerMetrics;
036import org.apache.hadoop.hbase.ServerName;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
040import org.apache.hadoop.hbase.favored.FavoredNodesManager;
041import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
042import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position;
043import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
044import org.apache.hadoop.hbase.master.LoadBalancer;
045import org.apache.hadoop.hbase.master.MasterServices;
046import org.apache.hadoop.hbase.master.RegionPlan;
047import org.apache.hadoop.hbase.util.Pair;
048import org.apache.yetus.audience.InterfaceAudience;
049import org.slf4j.Logger;
050import org.slf4j.LoggerFactory;
051
052import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
053import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
054import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
055
056/**
057 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that
058 * assigns favored nodes for each region. There is a Primary RegionServer that hosts
059 * the region, and then there is Secondary and Tertiary RegionServers. Currently, the
060 * favored nodes information is used in creating HDFS files - the Primary RegionServer
061 * passes the primary, secondary, tertiary node addresses as hints to the
062 * DistributedFileSystem API for creating files on the filesystem. These nodes are
063 * treated as hints by the HDFS to place the blocks of the file. This alleviates the
064 * problem to do with reading from remote nodes (since we can make the Secondary
065 * RegionServer as the new Primary RegionServer) after a region is recovered. This
066 * should help provide consistent read latencies for the regions even when their
067 * primary region servers die. This provides two
068 * {@link CandidateGenerator}
069 *
070 */
071@InterfaceAudience.Private
072public class FavoredStochasticBalancer extends StochasticLoadBalancer implements
073    FavoredNodesPromoter {
074
075  private static final Logger LOG = LoggerFactory.getLogger(FavoredStochasticBalancer.class);
076  private FavoredNodesManager fnm;
077
078  @Override
079  public void initialize() throws HBaseIOException {
080    configureGenerators();
081    super.initialize();
082  }
083
084  protected void configureGenerators() {
085    List<CandidateGenerator> fnPickers = new ArrayList<>(2);
086    fnPickers.add(new FavoredNodeLoadPicker());
087    fnPickers.add(new FavoredNodeLocalityPicker());
088    setCandidateGenerators(fnPickers);
089  }
090
091  @Override
092  public synchronized void setMasterServices(MasterServices masterServices) {
093    super.setMasterServices(masterServices);
094    fnm = masterServices.getFavoredNodesManager();
095  }
096
097  /*
098   * Round robin assignment: Segregate the regions into two types:
099   *
100   * 1. The regions that have favored node assignment where at least one of the favored node
101   * is still alive. In this case, try to adhere to the current favored nodes assignment as
102   * much as possible - i.e., if the current primary is gone, then make the secondary or
103   * tertiary as the new host for the region (based on their current load). Note that we don't
104   * change the favored node assignments here (even though one or more favored node is
105   * currently down). That will be done by the admin operations.
106   *
107   * 2. The regions that currently don't have favored node assignments. Generate favored nodes
108   * for them and then assign. Generate the primary fn in round robin fashion and generate
109   * secondary and tertiary as per favored nodes constraints.
110   */
111  @Override
112  public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
113      List<ServerName> servers) throws HBaseIOException {
114
115    metricsBalancer.incrMiscInvocations();
116
117    Set<RegionInfo> regionSet = Sets.newHashSet(regions);
118    Map<ServerName, List<RegionInfo>> assignmentMap = assignMasterSystemRegions(regions, servers);
119    if (assignmentMap != null && !assignmentMap.isEmpty()) {
120      servers = new ArrayList<>(servers);
121      // Guarantee not to put other regions on master
122      servers.remove(masterServerName);
123      List<RegionInfo> masterRegions = assignmentMap.get(masterServerName);
124      if (!masterRegions.isEmpty()) {
125        for (RegionInfo region: masterRegions) {
126          regionSet.remove(region);
127        }
128      }
129    }
130
131    if (regionSet.isEmpty()) {
132      return assignmentMap;
133    }
134
135    try {
136      FavoredNodeAssignmentHelper helper =
137          new FavoredNodeAssignmentHelper(servers, fnm.getRackManager());
138      helper.initialize();
139
140      Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet);
141      regionSet.removeAll(systemRegions);
142
143      // Assign all system regions
144      Map<ServerName, List<RegionInfo>> systemAssignments =
145        super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers);
146
147      // Segregate favored and non-favored nodes regions and assign accordingly.
148      Pair<Map<ServerName,List<RegionInfo>>, List<RegionInfo>> segregatedRegions =
149        segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers);
150      Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
151      Map<ServerName, List<RegionInfo>> regionsWithoutFN =
152        generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond());
153
154      // merge the assignment maps
155      mergeAssignmentMaps(assignmentMap, systemAssignments);
156      mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap);
157      mergeAssignmentMaps(assignmentMap, regionsWithoutFN);
158
159    } catch (Exception ex) {
160      throw new HBaseIOException("Encountered exception while doing favored-nodes assignment "
161        + ex + " Falling back to regular assignment", ex);
162    }
163    return assignmentMap;
164  }
165
166  private void mergeAssignmentMaps(Map<ServerName, List<RegionInfo>> assignmentMap,
167      Map<ServerName, List<RegionInfo>> otherAssignments) {
168
169    if (otherAssignments == null || otherAssignments.isEmpty()) {
170      return;
171    }
172
173    for (Entry<ServerName, List<RegionInfo>> entry : otherAssignments.entrySet()) {
174      ServerName sn = entry.getKey();
175      List<RegionInfo> regionsList = entry.getValue();
176      if (assignmentMap.get(sn) == null) {
177        assignmentMap.put(sn, Lists.newArrayList(regionsList));
178      } else {
179        assignmentMap.get(sn).addAll(regionsList);
180      }
181    }
182  }
183
184  private Map<ServerName, List<RegionInfo>> generateFNForRegionsWithoutFN(
185      FavoredNodeAssignmentHelper helper, List<RegionInfo> regions) throws IOException {
186
187    Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap();
188    Map<RegionInfo, List<ServerName>> regionsNoFNMap;
189
190    if (regions.size() > 0) {
191      regionsNoFNMap = helper.generateFavoredNodesRoundRobin(assignmentMap, regions);
192      fnm.updateFavoredNodes(regionsNoFNMap);
193    }
194    return assignmentMap;
195  }
196
197  /*
198   * Return a pair - one with assignments when favored nodes are present and another with regions
199   * without favored nodes.
200   */
201  private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>>
202  segregateRegionsAndAssignRegionsWithFavoredNodes(Collection<RegionInfo> regions,
203      List<ServerName> onlineServers) throws HBaseIOException {
204
205    // Since we expect FN to be present most of the time, lets create map with same size
206    Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes =
207        new HashMap<>(onlineServers.size());
208    List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>();
209
210    for (RegionInfo region : regions) {
211      List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
212      ServerName primaryHost = null;
213      ServerName secondaryHost = null;
214      ServerName tertiaryHost = null;
215
216      if (favoredNodes != null && !favoredNodes.isEmpty()) {
217        for (ServerName s : favoredNodes) {
218          ServerName serverWithLegitStartCode = getServerFromFavoredNode(onlineServers, s);
219          if (serverWithLegitStartCode != null) {
220            FavoredNodesPlan.Position position =
221                FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s);
222            if (Position.PRIMARY.equals(position)) {
223              primaryHost = serverWithLegitStartCode;
224            } else if (Position.SECONDARY.equals(position)) {
225              secondaryHost = serverWithLegitStartCode;
226            } else if (Position.TERTIARY.equals(position)) {
227              tertiaryHost = serverWithLegitStartCode;
228            }
229          }
230        }
231        assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, primaryHost,
232            secondaryHost, tertiaryHost);
233      } else {
234        regionsWithNoFavoredNodes.add(region);
235      }
236    }
237    return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
238  }
239
240  private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes,
241      RegionInfo region, ServerName host) {
242
243    List<RegionInfo> regionsOnServer;
244    if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
245      regionsOnServer = Lists.newArrayList();
246      assignmentMapForFavoredNodes.put(host, regionsOnServer);
247    }
248    regionsOnServer.add(region);
249  }
250
251  /*
252   * Get the ServerName for the FavoredNode. Since FN's startcode is -1, we could want to get the
253   * ServerName with the correct start code from the list of provided servers.
254   */
255  private ServerName getServerFromFavoredNode(List<ServerName> servers, ServerName fn) {
256    for (ServerName server : servers) {
257      if (ServerName.isSameAddress(fn, server)) {
258        return server;
259      }
260    }
261    return null;
262  }
263
264  /*
265   * Assign the region to primary if its available. If both secondary and tertiary are available,
266   * assign to the host which has less load. Else assign to secondary or tertiary whichever is
267   * available (in that order).
268   */
269  private void assignRegionToAvailableFavoredNode(
270      Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region,
271      ServerName primaryHost, ServerName secondaryHost, ServerName tertiaryHost) {
272
273    if (primaryHost != null) {
274      addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
275
276    } else if (secondaryHost != null && tertiaryHost != null) {
277
278      // Assign the region to the one with a lower load (both have the desired hdfs blocks)
279      ServerName s;
280      ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
281      ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
282      if (secondaryLoad != null && tertiaryLoad != null) {
283        if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) {
284          s = secondaryHost;
285        } else {
286          s = tertiaryHost;
287        }
288      } else {
289        // We don't have one/more load, lets just choose a random node
290        s = RANDOM.nextBoolean() ? secondaryHost : tertiaryHost;
291      }
292      addRegionToMap(assignmentMapForFavoredNodes, region, s);
293    } else if (secondaryHost != null) {
294      addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
295    } else if (tertiaryHost != null) {
296      addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
297    } else {
298      // No favored nodes are online, lets assign to BOGUS server
299      addRegionToMap(assignmentMapForFavoredNodes, region, BOGUS_SERVER_NAME);
300    }
301  }
302
303  /*
304   * If we have favored nodes for a region, we will return one of the FN as destination. If
305   * favored nodes are not present for a region, we will generate and return one of the FN as
306   * destination. If we can't generate anything, lets fallback.
307   */
308  @Override
309  public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers)
310      throws HBaseIOException {
311
312    if (servers != null && servers.contains(masterServerName)) {
313      if (shouldBeOnMaster(regionInfo)) {
314        metricsBalancer.incrMiscInvocations();
315        return masterServerName;
316      }
317      if (!LoadBalancer.isTablesOnMaster(getConf())) {
318        // Guarantee we do not put any regions on master
319        servers = new ArrayList<>(servers);
320        servers.remove(masterServerName);
321      }
322    }
323
324    ServerName destination = null;
325    if (!FavoredNodesManager.isFavoredNodeApplicable(regionInfo)) {
326      return super.randomAssignment(regionInfo, servers);
327    }
328
329    metricsBalancer.incrMiscInvocations();
330
331    List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo);
332    if (favoredNodes == null || favoredNodes.isEmpty()) {
333      // Generate new favored nodes and return primary
334      FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf());
335      helper.initialize();
336      try {
337        favoredNodes = helper.generateFavoredNodes(regionInfo);
338        updateFavoredNodesForRegion(regionInfo, favoredNodes);
339
340      } catch (IOException e) {
341        LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + e);
342        throw new HBaseIOException(e);
343      }
344    }
345
346    List<ServerName> onlineServers = getOnlineFavoredNodes(servers, favoredNodes);
347    if (onlineServers.size() > 0) {
348      destination = onlineServers.get(RANDOM.nextInt(onlineServers.size()));
349    }
350
351    boolean alwaysAssign = getConf().getBoolean(FAVORED_ALWAYS_ASSIGN_REGIONS, true);
352    if (destination == null && alwaysAssign) {
353      LOG.warn("Can't generate FN for region: " + regionInfo + " falling back");
354      destination = super.randomAssignment(regionInfo, servers);
355    }
356    return destination;
357  }
358
359  private void updateFavoredNodesForRegion(RegionInfo regionInfo, List<ServerName> newFavoredNodes)
360      throws IOException {
361    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
362    regionFNMap.put(regionInfo, newFavoredNodes);
363    fnm.updateFavoredNodes(regionFNMap);
364  }
365
366  /*
367   * Reuse BaseLoadBalancer's retainAssignment, but generate favored nodes when its missing.
368   */
369  @Override
370  public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
371      List<ServerName> servers) throws HBaseIOException {
372
373    Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap();
374    Map<ServerName, List<RegionInfo>> result = super.retainAssignment(regions, servers);
375    if (result == null || result.isEmpty()) {
376      LOG.warn("Nothing to assign to, probably no servers or no regions");
377      return null;
378    }
379
380    // Guarantee not to put other regions on master
381    if (servers != null && servers.contains(masterServerName)) {
382      servers = new ArrayList<>(servers);
383      servers.remove(masterServerName);
384    }
385
386    // Lets check if favored nodes info is in META, if not generate now.
387    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf());
388    helper.initialize();
389
390    LOG.debug("Generating favored nodes for regions missing them.");
391    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
392
393    try {
394      for (Entry<ServerName, List<RegionInfo>> entry : result.entrySet()) {
395
396        ServerName sn = entry.getKey();
397        ServerName primary = ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE);
398
399        for (RegionInfo hri : entry.getValue()) {
400
401          if (FavoredNodesManager.isFavoredNodeApplicable(hri)) {
402            List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
403            if (favoredNodes == null || favoredNodes.size() < FAVORED_NODES_NUM) {
404
405              LOG.debug("Generating favored nodes for: " + hri + " with primary: " + primary);
406              ServerName[] secondaryAndTertiaryNodes = helper.getSecondaryAndTertiary(hri, primary);
407              if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) {
408                List<ServerName> newFavoredNodes = Lists.newArrayList();
409                newFavoredNodes.add(primary);
410                newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(),
411                    secondaryAndTertiaryNodes[0].getPort(), NON_STARTCODE));
412                newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
413                    secondaryAndTertiaryNodes[1].getPort(), NON_STARTCODE));
414                regionFNMap.put(hri, newFavoredNodes);
415                addRegionToMap(assignmentMap, hri, sn);
416
417              } else {
418                throw new HBaseIOException("Cannot generate secondary/tertiary FN for " + hri
419                  + " generated "
420                  + (secondaryAndTertiaryNodes != null ? secondaryAndTertiaryNodes : " nothing"));
421              }
422            } else {
423              List<ServerName> onlineFN = getOnlineFavoredNodes(servers, favoredNodes);
424              if (onlineFN.isEmpty()) {
425                // All favored nodes are dead, lets assign it to BOGUS
426                addRegionToMap(assignmentMap, hri, BOGUS_SERVER_NAME);
427              } else {
428                // Is primary not on FN? Less likely, but we can still take care of this.
429                if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, sn) != null) {
430                  addRegionToMap(assignmentMap, hri, sn);
431                } else {
432                  ServerName destination = onlineFN.get(RANDOM.nextInt(onlineFN.size()));
433                  LOG.warn("Region: " + hri + " not hosted on favored nodes: " + favoredNodes
434                    + " current: " + sn + " moving to: " + destination);
435                  addRegionToMap(assignmentMap, hri, destination);
436                }
437              }
438            }
439          } else {
440            addRegionToMap(assignmentMap, hri, sn);
441          }
442        }
443      }
444
445      if (!regionFNMap.isEmpty()) {
446        LOG.debug("Updating FN in meta for missing regions, count: " + regionFNMap.size());
447        fnm.updateFavoredNodes(regionFNMap);
448      }
449
450    } catch (IOException e) {
451      throw new HBaseIOException("Cannot generate/update FN for regions: " + regionFNMap.keySet());
452    }
453
454    return assignmentMap;
455  }
456
457  /*
458   * Return list of favored nodes that are online.
459   */
460  private List<ServerName> getOnlineFavoredNodes(List<ServerName> onlineServers,
461      List<ServerName> serversWithoutStartCodes) {
462    if (serversWithoutStartCodes == null) {
463      return null;
464    } else {
465      List<ServerName> result = Lists.newArrayList();
466      for (ServerName sn : serversWithoutStartCodes) {
467        for (ServerName online : onlineServers) {
468          if (ServerName.isSameAddress(sn, online)) {
469            result.add(online);
470          }
471        }
472      }
473      return result;
474    }
475  }
476
477  public synchronized List<ServerName> getFavoredNodes(RegionInfo regionInfo) {
478    return this.fnm.getFavoredNodes(regionInfo);
479  }
480
481  /*
482   * Generate Favored Nodes for daughters during region split.
483   *
484   * If the parent does not have FN, regenerates them for the daughters.
485   *
486   * If the parent has FN, inherit two FN from parent for each daughter and generate the remaining.
487   * The primary FN for both the daughters should be the same as parent. Inherit the secondary
488   * FN from the parent but keep it different for each daughter. Choose the remaining FN
489   * randomly. This would give us better distribution over a period of time after enough splits.
490   */
491  @Override
492  public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent,
493      RegionInfo regionA, RegionInfo regionB) throws IOException {
494
495    Map<RegionInfo, List<ServerName>> result = new HashMap<>();
496    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
497    helper.initialize();
498
499    List<ServerName> parentFavoredNodes = fnm.getFavoredNodes(parent);
500    if (parentFavoredNodes == null) {
501      LOG.debug("Unable to find favored nodes for parent, " + parent
502          + " generating new favored nodes for daughter");
503      result.put(regionA, helper.generateFavoredNodes(regionA));
504      result.put(regionB, helper.generateFavoredNodes(regionB));
505
506    } else {
507
508      // Lets get the primary and secondary from parent for regionA
509      Set<ServerName> regionAFN =
510          getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY);
511      result.put(regionA, Lists.newArrayList(regionAFN));
512
513      // Lets get the primary and tertiary from parent for regionB
514      Set<ServerName> regionBFN =
515          getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY);
516      result.put(regionB, Lists.newArrayList(regionBFN));
517    }
518
519    fnm.updateFavoredNodes(result);
520  }
521
522  private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper,
523      List<ServerName> parentFavoredNodes, Position primary, Position secondary)
524      throws IOException {
525
526    Set<ServerName> daughterFN = Sets.newLinkedHashSet();
527    if (parentFavoredNodes.size() >= primary.ordinal()) {
528      daughterFN.add(parentFavoredNodes.get(primary.ordinal()));
529    }
530
531    if (parentFavoredNodes.size() >= secondary.ordinal()) {
532      daughterFN.add(parentFavoredNodes.get(secondary.ordinal()));
533    }
534
535    while (daughterFN.size() < FAVORED_NODES_NUM) {
536      ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN));
537      daughterFN.add(newNode);
538    }
539    return daughterFN;
540  }
541
542  /*
543   * Generate favored nodes for a region during merge. Choose the FN from one of the sources to
544   * keep it simple.
545   */
546  @Override
547  public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo [] mergeParents)
548      throws IOException {
549    updateFavoredNodesForRegion(merged, fnm.getFavoredNodes(mergeParents[0]));
550  }
551
552  /*
553   * Pick favored nodes with the highest locality for a region with lowest locality.
554   */
555  private class FavoredNodeLocalityPicker extends CandidateGenerator {
556
557    @Override
558    protected Cluster.Action generate(Cluster cluster) {
559
560      int thisServer = pickRandomServer(cluster);
561      int thisRegion;
562      if (thisServer == -1) {
563        LOG.trace("Could not pick lowest local region server");
564        return Cluster.NullAction;
565      } else {
566        // Pick lowest local region on this server
567        thisRegion = pickLowestLocalRegionOnServer(cluster, thisServer);
568      }
569      if (thisRegion == -1) {
570        if (cluster.regionsPerServer[thisServer].length > 0) {
571          LOG.trace("Could not pick lowest local region even when region server held "
572            + cluster.regionsPerServer[thisServer].length + " regions");
573        }
574        return Cluster.NullAction;
575      }
576
577      RegionInfo hri = cluster.regions[thisRegion];
578      List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
579      int otherServer;
580      if (favoredNodes == null) {
581        if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) {
582          otherServer = pickOtherRandomServer(cluster, thisServer);
583        } else {
584          // No FN, ignore
585          LOG.trace("Ignoring, no favored nodes for region: " + hri);
586          return Cluster.NullAction;
587        }
588      } else {
589        // Pick other favored node with the highest locality
590        otherServer = getDifferentFavoredNode(cluster, favoredNodes, thisServer);
591      }
592      return getAction(thisServer, thisRegion, otherServer, -1);
593    }
594
595    private int getDifferentFavoredNode(Cluster cluster, List<ServerName> favoredNodes,
596        int currentServer) {
597      List<Integer> fnIndex = new ArrayList<>();
598      for (ServerName sn : favoredNodes) {
599        if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) {
600          fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort()));
601        }
602      }
603      float locality = 0;
604      int highestLocalRSIndex = -1;
605      for (Integer index : fnIndex) {
606        if (index != currentServer) {
607          float temp = cluster.localityPerServer[index];
608          if (temp >= locality) {
609            locality = temp;
610            highestLocalRSIndex = index;
611          }
612        }
613      }
614      return highestLocalRSIndex;
615    }
616
617    private int pickLowestLocalRegionOnServer(Cluster cluster, int server) {
618      return cluster.getLowestLocalityRegionOnServer(server);
619    }
620  }
621
622  /*
623   * This is like LoadCandidateGenerator, but we choose appropriate FN for the region on the
624   * most loaded server.
625   */
626  class FavoredNodeLoadPicker extends CandidateGenerator {
627
628    @Override
629    Cluster.Action generate(Cluster cluster) {
630      cluster.sortServersByRegionCount();
631      int thisServer = pickMostLoadedServer(cluster);
632      int thisRegion = pickRandomRegion(cluster, thisServer, 0);
633      RegionInfo hri = cluster.regions[thisRegion];
634      int otherServer;
635      List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
636      if (favoredNodes == null) {
637        if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) {
638          otherServer = pickLeastLoadedServer(cluster, thisServer);
639        } else {
640          return Cluster.NullAction;
641        }
642      } else {
643        otherServer = pickLeastLoadedFNServer(cluster, favoredNodes, thisServer);
644      }
645      return getAction(thisServer, thisRegion, otherServer, -1);
646    }
647
648    private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
649      Integer[] servers = cluster.serverIndicesSortedByRegionCount;
650      int index;
651      for (index = 0; index < servers.length ; index++) {
652        if ((servers[index] != null) && servers[index] != thisServer) {
653          break;
654        }
655      }
656      return servers[index];
657    }
658
659    private int pickLeastLoadedFNServer(final Cluster cluster, List<ServerName> favoredNodes,
660        int currentServerIndex) {
661      List<Integer> fnIndex = new ArrayList<>();
662      for (ServerName sn : favoredNodes) {
663        if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) {
664          fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort()));
665        }
666      }
667      int leastLoadedFN = -1;
668      int load = Integer.MAX_VALUE;
669      for (Integer index : fnIndex) {
670        if (index != currentServerIndex) {
671          int temp = cluster.getNumRegions(index);
672          if (temp < load) {
673            load = temp;
674            leastLoadedFN = index;
675          }
676        }
677      }
678      return leastLoadedFN;
679    }
680
681    private int pickMostLoadedServer(final Cluster cluster) {
682      Integer[] servers = cluster.serverIndicesSortedByRegionCount;
683      int index;
684      for (index = servers.length - 1; index > 0 ; index--) {
685        if (servers[index] != null) {
686          break;
687        }
688      }
689      return servers[index];
690    }
691  }
692
693  /*
694   * For all regions correctly assigned to favored nodes, we just use the stochastic balancer
695   * implementation. For the misplaced regions, we assign a bogus server to it and AM takes care.
696   */
697  @Override
698  public synchronized List<RegionPlan> balanceTable(TableName tableName,
699      Map<ServerName, List<RegionInfo>> loadOfOneTable) {
700
701    if (this.services != null) {
702
703      List<RegionPlan> regionPlans = Lists.newArrayList();
704      Map<ServerName, List<RegionInfo>> correctAssignments = new HashMap<>();
705      int misplacedRegions = 0;
706
707      for (Entry<ServerName, List<RegionInfo>> entry : loadOfOneTable.entrySet()) {
708        ServerName current = entry.getKey();
709        List<RegionInfo> regions = Lists.newArrayList();
710        correctAssignments.put(current, regions);
711
712        for (RegionInfo hri : entry.getValue()) {
713          List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
714          if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, current) != null ||
715              !FavoredNodesManager.isFavoredNodeApplicable(hri)) {
716            regions.add(hri);
717
718          } else {
719            // No favored nodes, lets unassign.
720            LOG.warn("Region not on favored nodes, unassign. Region: " + hri
721              + " current: " + current + " favored nodes: " + favoredNodes);
722            try {
723              this.services.getAssignmentManager().unassign(hri);
724            } catch (IOException e) {
725              LOG.warn("Failed unassign", e);
726              continue;
727            }
728            RegionPlan rp = new RegionPlan(hri, null, null);
729            regionPlans.add(rp);
730            misplacedRegions++;
731          }
732        }
733      }
734      LOG.debug("Found misplaced regions: " + misplacedRegions + ", not on favored nodes.");
735      List<RegionPlan> regionPlansFromBalance = super.balanceTable(tableName, correctAssignments);
736      if (regionPlansFromBalance != null) {
737        regionPlans.addAll(regionPlansFromBalance);
738      }
739      return regionPlans;
740    } else {
741      return super.balanceTable(tableName, loadOfOneTable);
742    }
743  }
744}
745