001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE;
021import static org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper.FAVORED_NODES_NUM;
022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
023import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
024import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
025
026import edu.umd.cs.findbugs.annotations.NonNull;
027import java.io.IOException;
028import java.util.ArrayList;
029import java.util.Collection;
030import java.util.HashMap;
031import java.util.List;
032import java.util.Map;
033import java.util.Map.Entry;
034import java.util.Set;
035
036import org.apache.hadoop.hbase.HBaseIOException;
037import org.apache.hadoop.hbase.ServerMetrics;
038import org.apache.hadoop.hbase.ServerName;
039import org.apache.hadoop.hbase.TableName;
040import org.apache.hadoop.hbase.client.RegionInfo;
041import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
042import org.apache.hadoop.hbase.favored.FavoredNodesManager;
043import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
044import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position;
045import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
046import org.apache.hadoop.hbase.master.LoadBalancer;
047import org.apache.hadoop.hbase.master.MasterServices;
048import org.apache.hadoop.hbase.master.RegionPlan;
049import org.apache.hadoop.hbase.util.Pair;
050import org.apache.yetus.audience.InterfaceAudience;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
055import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
056import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
057
058/**
059 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that
060 * assigns favored nodes for each region. There is a Primary RegionServer that hosts
061 * the region, and then there is Secondary and Tertiary RegionServers. Currently, the
062 * favored nodes information is used in creating HDFS files - the Primary RegionServer
063 * passes the primary, secondary, tertiary node addresses as hints to the
064 * DistributedFileSystem API for creating files on the filesystem. These nodes are
065 * treated as hints by the HDFS to place the blocks of the file. This alleviates the
066 * problem to do with reading from remote nodes (since we can make the Secondary
067 * RegionServer as the new Primary RegionServer) after a region is recovered. This
068 * should help provide consistent read latencies for the regions even when their
069 * primary region servers die. This provides two
070 * {@link CandidateGenerator}
071 *
072 */
073@InterfaceAudience.Private
074public class FavoredStochasticBalancer extends StochasticLoadBalancer implements
075    FavoredNodesPromoter {
076
077  private static final Logger LOG = LoggerFactory.getLogger(FavoredStochasticBalancer.class);
078  private FavoredNodesManager fnm;
079
080  @Override
081  public void initialize() throws HBaseIOException {
082    configureGenerators();
083    super.initialize();
084  }
085
086  protected void configureGenerators() {
087    List<CandidateGenerator> fnPickers = new ArrayList<>(2);
088    fnPickers.add(new FavoredNodeLoadPicker());
089    fnPickers.add(new FavoredNodeLocalityPicker());
090    setCandidateGenerators(fnPickers);
091  }
092
093  @Override
094  public synchronized void setMasterServices(MasterServices masterServices) {
095    super.setMasterServices(masterServices);
096    fnm = masterServices.getFavoredNodesManager();
097  }
098
099  /*
100   * Round robin assignment: Segregate the regions into two types:
101   *
102   * 1. The regions that have favored node assignment where at least one of the favored node
103   * is still alive. In this case, try to adhere to the current favored nodes assignment as
104   * much as possible - i.e., if the current primary is gone, then make the secondary or
105   * tertiary as the new host for the region (based on their current load). Note that we don't
106   * change the favored node assignments here (even though one or more favored node is
107   * currently down). That will be done by the admin operations.
108   *
109   * 2. The regions that currently don't have favored node assignments. Generate favored nodes
110   * for them and then assign. Generate the primary fn in round robin fashion and generate
111   * secondary and tertiary as per favored nodes constraints.
112   */
113  @Override
114  @NonNull
115  public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
116      List<ServerName> servers) throws HBaseIOException {
117
118    metricsBalancer.incrMiscInvocations();
119
120    Set<RegionInfo> regionSet = Sets.newHashSet(regions);
121    Map<ServerName, List<RegionInfo>> assignmentMap = assignMasterSystemRegions(regions, servers);
122    if (!assignmentMap.isEmpty()) {
123      servers = new ArrayList<>(servers);
124      // Guarantee not to put other regions on master
125      servers.remove(masterServerName);
126      List<RegionInfo> masterRegions = assignmentMap.get(masterServerName);
127      if (!masterRegions.isEmpty()) {
128        for (RegionInfo region: masterRegions) {
129          regionSet.remove(region);
130        }
131      }
132    }
133
134    if (regionSet.isEmpty()) {
135      return assignmentMap;
136    }
137
138    try {
139      FavoredNodeAssignmentHelper helper =
140          new FavoredNodeAssignmentHelper(servers, fnm.getRackManager());
141      helper.initialize();
142
143      Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet);
144      regionSet.removeAll(systemRegions);
145
146      // Assign all system regions
147      Map<ServerName, List<RegionInfo>> systemAssignments =
148        super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers);
149
150      // Segregate favored and non-favored nodes regions and assign accordingly.
151      Pair<Map<ServerName,List<RegionInfo>>, List<RegionInfo>> segregatedRegions =
152        segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers);
153      Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
154      Map<ServerName, List<RegionInfo>> regionsWithoutFN =
155        generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond());
156
157      // merge the assignment maps
158      mergeAssignmentMaps(assignmentMap, systemAssignments);
159      mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap);
160      mergeAssignmentMaps(assignmentMap, regionsWithoutFN);
161
162    } catch (Exception ex) {
163      throw new HBaseIOException("Encountered exception while doing favored-nodes assignment "
164        + ex + " Falling back to regular assignment", ex);
165    }
166    return assignmentMap;
167  }
168
169  private void mergeAssignmentMaps(Map<ServerName, List<RegionInfo>> assignmentMap,
170      Map<ServerName, List<RegionInfo>> otherAssignments) {
171
172    if (otherAssignments == null || otherAssignments.isEmpty()) {
173      return;
174    }
175
176    for (Entry<ServerName, List<RegionInfo>> entry : otherAssignments.entrySet()) {
177      ServerName sn = entry.getKey();
178      List<RegionInfo> regionsList = entry.getValue();
179      if (assignmentMap.get(sn) == null) {
180        assignmentMap.put(sn, Lists.newArrayList(regionsList));
181      } else {
182        assignmentMap.get(sn).addAll(regionsList);
183      }
184    }
185  }
186
187  private Map<ServerName, List<RegionInfo>> generateFNForRegionsWithoutFN(
188      FavoredNodeAssignmentHelper helper, List<RegionInfo> regions) throws IOException {
189
190    Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap();
191    Map<RegionInfo, List<ServerName>> regionsNoFNMap;
192
193    if (regions.size() > 0) {
194      regionsNoFNMap = helper.generateFavoredNodesRoundRobin(assignmentMap, regions);
195      fnm.updateFavoredNodes(regionsNoFNMap);
196    }
197    return assignmentMap;
198  }
199
200  /*
201   * Return a pair - one with assignments when favored nodes are present and another with regions
202   * without favored nodes.
203   */
204  private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>>
205  segregateRegionsAndAssignRegionsWithFavoredNodes(Collection<RegionInfo> regions,
206      List<ServerName> onlineServers) throws HBaseIOException {
207
208    // Since we expect FN to be present most of the time, lets create map with same size
209    Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes =
210        new HashMap<>(onlineServers.size());
211    List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>();
212
213    for (RegionInfo region : regions) {
214      List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
215      ServerName primaryHost = null;
216      ServerName secondaryHost = null;
217      ServerName tertiaryHost = null;
218
219      if (favoredNodes != null && !favoredNodes.isEmpty()) {
220        for (ServerName s : favoredNodes) {
221          ServerName serverWithLegitStartCode = getServerFromFavoredNode(onlineServers, s);
222          if (serverWithLegitStartCode != null) {
223            FavoredNodesPlan.Position position =
224                FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s);
225            if (Position.PRIMARY.equals(position)) {
226              primaryHost = serverWithLegitStartCode;
227            } else if (Position.SECONDARY.equals(position)) {
228              secondaryHost = serverWithLegitStartCode;
229            } else if (Position.TERTIARY.equals(position)) {
230              tertiaryHost = serverWithLegitStartCode;
231            }
232          }
233        }
234        assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, primaryHost,
235            secondaryHost, tertiaryHost);
236      } else {
237        regionsWithNoFavoredNodes.add(region);
238      }
239    }
240    return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
241  }
242
243  private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes,
244      RegionInfo region, ServerName host) {
245
246    List<RegionInfo> regionsOnServer;
247    if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
248      regionsOnServer = Lists.newArrayList();
249      assignmentMapForFavoredNodes.put(host, regionsOnServer);
250    }
251    regionsOnServer.add(region);
252  }
253
254  /*
255   * Get the ServerName for the FavoredNode. Since FN's startcode is -1, we could want to get the
256   * ServerName with the correct start code from the list of provided servers.
257   */
258  private ServerName getServerFromFavoredNode(List<ServerName> servers, ServerName fn) {
259    for (ServerName server : servers) {
260      if (ServerName.isSameAddress(fn, server)) {
261        return server;
262      }
263    }
264    return null;
265  }
266
267  /*
268   * Assign the region to primary if its available. If both secondary and tertiary are available,
269   * assign to the host which has less load. Else assign to secondary or tertiary whichever is
270   * available (in that order).
271   */
272  private void assignRegionToAvailableFavoredNode(
273      Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region,
274      ServerName primaryHost, ServerName secondaryHost, ServerName tertiaryHost) {
275
276    if (primaryHost != null) {
277      addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
278
279    } else if (secondaryHost != null && tertiaryHost != null) {
280
281      // Assign the region to the one with a lower load (both have the desired hdfs blocks)
282      ServerName s;
283      ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
284      ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
285      if (secondaryLoad != null && tertiaryLoad != null) {
286        if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) {
287          s = secondaryHost;
288        } else {
289          s = tertiaryHost;
290        }
291      } else {
292        // We don't have one/more load, lets just choose a random node
293        s = RANDOM.nextBoolean() ? secondaryHost : tertiaryHost;
294      }
295      addRegionToMap(assignmentMapForFavoredNodes, region, s);
296    } else if (secondaryHost != null) {
297      addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
298    } else if (tertiaryHost != null) {
299      addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
300    } else {
301      // No favored nodes are online, lets assign to BOGUS server
302      addRegionToMap(assignmentMapForFavoredNodes, region, BOGUS_SERVER_NAME);
303    }
304  }
305
306  /*
307   * If we have favored nodes for a region, we will return one of the FN as destination. If
308   * favored nodes are not present for a region, we will generate and return one of the FN as
309   * destination. If we can't generate anything, lets fallback.
310   */
311  @Override
312  public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers)
313      throws HBaseIOException {
314
315    if (servers != null && servers.contains(masterServerName)) {
316      if (shouldBeOnMaster(regionInfo)) {
317        metricsBalancer.incrMiscInvocations();
318        return masterServerName;
319      }
320      if (!LoadBalancer.isTablesOnMaster(getConf())) {
321        // Guarantee we do not put any regions on master
322        servers = new ArrayList<>(servers);
323        servers.remove(masterServerName);
324      }
325    }
326
327    ServerName destination = null;
328    if (!FavoredNodesManager.isFavoredNodeApplicable(regionInfo)) {
329      return super.randomAssignment(regionInfo, servers);
330    }
331
332    metricsBalancer.incrMiscInvocations();
333
334    List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo);
335    if (favoredNodes == null || favoredNodes.isEmpty()) {
336      // Generate new favored nodes and return primary
337      FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf());
338      helper.initialize();
339      try {
340        favoredNodes = helper.generateFavoredNodes(regionInfo);
341        updateFavoredNodesForRegion(regionInfo, favoredNodes);
342
343      } catch (IOException e) {
344        LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + e);
345        throw new HBaseIOException(e);
346      }
347    }
348
349    List<ServerName> onlineServers = getOnlineFavoredNodes(servers, favoredNodes);
350    if (onlineServers.size() > 0) {
351      destination = onlineServers.get(RANDOM.nextInt(onlineServers.size()));
352    }
353
354    boolean alwaysAssign = getConf().getBoolean(FAVORED_ALWAYS_ASSIGN_REGIONS, true);
355    if (destination == null && alwaysAssign) {
356      LOG.warn("Can't generate FN for region: " + regionInfo + " falling back");
357      destination = super.randomAssignment(regionInfo, servers);
358    }
359    return destination;
360  }
361
362  private void updateFavoredNodesForRegion(RegionInfo regionInfo, List<ServerName> newFavoredNodes)
363      throws IOException {
364    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
365    regionFNMap.put(regionInfo, newFavoredNodes);
366    fnm.updateFavoredNodes(regionFNMap);
367  }
368
369  /*
370   * Reuse BaseLoadBalancer's retainAssignment, but generate favored nodes when its missing.
371   */
372  @Override
373  @NonNull
374  public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
375      List<ServerName> servers) throws HBaseIOException {
376
377    Map<ServerName, List<RegionInfo>> assignmentMap = Maps.newHashMap();
378    Map<ServerName, List<RegionInfo>> result = super.retainAssignment(regions, servers);
379    if (result.isEmpty()) {
380      LOG.warn("Nothing to assign to, probably no servers or no regions");
381      return result;
382    }
383
384    // Guarantee not to put other regions on master
385    if (servers != null && servers.contains(masterServerName)) {
386      servers = new ArrayList<>(servers);
387      servers.remove(masterServerName);
388    }
389
390    // Lets check if favored nodes info is in META, if not generate now.
391    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, getConf());
392    helper.initialize();
393
394    LOG.debug("Generating favored nodes for regions missing them.");
395    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
396
397    try {
398      for (Entry<ServerName, List<RegionInfo>> entry : result.entrySet()) {
399
400        ServerName sn = entry.getKey();
401        ServerName primary = ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE);
402
403        for (RegionInfo hri : entry.getValue()) {
404
405          if (FavoredNodesManager.isFavoredNodeApplicable(hri)) {
406            List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
407            if (favoredNodes == null || favoredNodes.size() < FAVORED_NODES_NUM) {
408
409              LOG.debug("Generating favored nodes for: " + hri + " with primary: " + primary);
410              ServerName[] secondaryAndTertiaryNodes = helper.getSecondaryAndTertiary(hri, primary);
411              if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) {
412                List<ServerName> newFavoredNodes = Lists.newArrayList();
413                newFavoredNodes.add(primary);
414                newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(),
415                    secondaryAndTertiaryNodes[0].getPort(), NON_STARTCODE));
416                newFavoredNodes.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
417                    secondaryAndTertiaryNodes[1].getPort(), NON_STARTCODE));
418                regionFNMap.put(hri, newFavoredNodes);
419                addRegionToMap(assignmentMap, hri, sn);
420
421              } else {
422                throw new HBaseIOException("Cannot generate secondary/tertiary FN for " + hri
423                  + " generated "
424                  + (secondaryAndTertiaryNodes != null ? secondaryAndTertiaryNodes : " nothing"));
425              }
426            } else {
427              List<ServerName> onlineFN = getOnlineFavoredNodes(servers, favoredNodes);
428              if (onlineFN.isEmpty()) {
429                // All favored nodes are dead, lets assign it to BOGUS
430                addRegionToMap(assignmentMap, hri, BOGUS_SERVER_NAME);
431              } else {
432                // Is primary not on FN? Less likely, but we can still take care of this.
433                if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, sn) != null) {
434                  addRegionToMap(assignmentMap, hri, sn);
435                } else {
436                  ServerName destination = onlineFN.get(RANDOM.nextInt(onlineFN.size()));
437                  LOG.warn("Region: " + hri + " not hosted on favored nodes: " + favoredNodes
438                    + " current: " + sn + " moving to: " + destination);
439                  addRegionToMap(assignmentMap, hri, destination);
440                }
441              }
442            }
443          } else {
444            addRegionToMap(assignmentMap, hri, sn);
445          }
446        }
447      }
448
449      if (!regionFNMap.isEmpty()) {
450        LOG.debug("Updating FN in meta for missing regions, count: " + regionFNMap.size());
451        fnm.updateFavoredNodes(regionFNMap);
452      }
453
454    } catch (IOException e) {
455      throw new HBaseIOException("Cannot generate/update FN for regions: " + regionFNMap.keySet());
456    }
457
458    return assignmentMap;
459  }
460
461  /*
462   * Return list of favored nodes that are online.
463   */
464  private List<ServerName> getOnlineFavoredNodes(List<ServerName> onlineServers,
465      List<ServerName> serversWithoutStartCodes) {
466    if (serversWithoutStartCodes == null) {
467      return null;
468    } else {
469      List<ServerName> result = Lists.newArrayList();
470      for (ServerName sn : serversWithoutStartCodes) {
471        for (ServerName online : onlineServers) {
472          if (ServerName.isSameAddress(sn, online)) {
473            result.add(online);
474          }
475        }
476      }
477      return result;
478    }
479  }
480
481  public synchronized List<ServerName> getFavoredNodes(RegionInfo regionInfo) {
482    return this.fnm.getFavoredNodes(regionInfo);
483  }
484
485  /*
486   * Generate Favored Nodes for daughters during region split.
487   *
488   * If the parent does not have FN, regenerates them for the daughters.
489   *
490   * If the parent has FN, inherit two FN from parent for each daughter and generate the remaining.
491   * The primary FN for both the daughters should be the same as parent. Inherit the secondary
492   * FN from the parent but keep it different for each daughter. Choose the remaining FN
493   * randomly. This would give us better distribution over a period of time after enough splits.
494   */
495  @Override
496  public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent,
497      RegionInfo regionA, RegionInfo regionB) throws IOException {
498
499    Map<RegionInfo, List<ServerName>> result = new HashMap<>();
500    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
501    helper.initialize();
502
503    List<ServerName> parentFavoredNodes = fnm.getFavoredNodes(parent);
504    if (parentFavoredNodes == null) {
505      LOG.debug("Unable to find favored nodes for parent, " + parent
506          + " generating new favored nodes for daughter");
507      result.put(regionA, helper.generateFavoredNodes(regionA));
508      result.put(regionB, helper.generateFavoredNodes(regionB));
509
510    } else {
511
512      // Lets get the primary and secondary from parent for regionA
513      Set<ServerName> regionAFN =
514          getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY);
515      result.put(regionA, Lists.newArrayList(regionAFN));
516
517      // Lets get the primary and tertiary from parent for regionB
518      Set<ServerName> regionBFN =
519          getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY);
520      result.put(regionB, Lists.newArrayList(regionBFN));
521    }
522
523    fnm.updateFavoredNodes(result);
524  }
525
526  private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper,
527      List<ServerName> parentFavoredNodes, Position primary, Position secondary)
528      throws IOException {
529
530    Set<ServerName> daughterFN = Sets.newLinkedHashSet();
531    if (parentFavoredNodes.size() >= primary.ordinal()) {
532      daughterFN.add(parentFavoredNodes.get(primary.ordinal()));
533    }
534
535    if (parentFavoredNodes.size() >= secondary.ordinal()) {
536      daughterFN.add(parentFavoredNodes.get(secondary.ordinal()));
537    }
538
539    while (daughterFN.size() < FAVORED_NODES_NUM) {
540      ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN));
541      daughterFN.add(newNode);
542    }
543    return daughterFN;
544  }
545
546  /*
547   * Generate favored nodes for a region during merge. Choose the FN from one of the sources to
548   * keep it simple.
549   */
550  @Override
551  public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo [] mergeParents)
552      throws IOException {
553    updateFavoredNodesForRegion(merged, fnm.getFavoredNodes(mergeParents[0]));
554  }
555
556  /*
557   * Pick favored nodes with the highest locality for a region with lowest locality.
558   */
559  private class FavoredNodeLocalityPicker extends CandidateGenerator {
560
561    @Override
562    protected Cluster.Action generate(Cluster cluster) {
563
564      int thisServer = pickRandomServer(cluster);
565      int thisRegion;
566      if (thisServer == -1) {
567        LOG.trace("Could not pick lowest local region server");
568        return Cluster.NullAction;
569      } else {
570        // Pick lowest local region on this server
571        thisRegion = pickLowestLocalRegionOnServer(cluster, thisServer);
572      }
573      if (thisRegion == -1) {
574        if (cluster.regionsPerServer[thisServer].length > 0) {
575          LOG.trace("Could not pick lowest local region even when region server held "
576            + cluster.regionsPerServer[thisServer].length + " regions");
577        }
578        return Cluster.NullAction;
579      }
580
581      RegionInfo hri = cluster.regions[thisRegion];
582      List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
583      int otherServer;
584      if (favoredNodes == null) {
585        if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) {
586          otherServer = pickOtherRandomServer(cluster, thisServer);
587        } else {
588          // No FN, ignore
589          LOG.trace("Ignoring, no favored nodes for region: " + hri);
590          return Cluster.NullAction;
591        }
592      } else {
593        // Pick other favored node with the highest locality
594        otherServer = getDifferentFavoredNode(cluster, favoredNodes, thisServer);
595      }
596      return getAction(thisServer, thisRegion, otherServer, -1);
597    }
598
599    private int getDifferentFavoredNode(Cluster cluster, List<ServerName> favoredNodes,
600        int currentServer) {
601      List<Integer> fnIndex = new ArrayList<>();
602      for (ServerName sn : favoredNodes) {
603        if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) {
604          fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort()));
605        }
606      }
607      float locality = 0;
608      int highestLocalRSIndex = -1;
609      for (Integer index : fnIndex) {
610        if (index != currentServer) {
611          float temp = cluster.localityPerServer[index];
612          if (temp >= locality) {
613            locality = temp;
614            highestLocalRSIndex = index;
615          }
616        }
617      }
618      return highestLocalRSIndex;
619    }
620
621    private int pickLowestLocalRegionOnServer(Cluster cluster, int server) {
622      return cluster.getLowestLocalityRegionOnServer(server);
623    }
624  }
625
626  /*
627   * This is like LoadCandidateGenerator, but we choose appropriate FN for the region on the
628   * most loaded server.
629   */
630  class FavoredNodeLoadPicker extends CandidateGenerator {
631
632    @Override
633    Cluster.Action generate(Cluster cluster) {
634      cluster.sortServersByRegionCount();
635      int thisServer = pickMostLoadedServer(cluster);
636      int thisRegion = pickRandomRegion(cluster, thisServer, 0);
637      RegionInfo hri = cluster.regions[thisRegion];
638      int otherServer;
639      List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
640      if (favoredNodes == null) {
641        if (!FavoredNodesManager.isFavoredNodeApplicable(hri)) {
642          otherServer = pickLeastLoadedServer(cluster, thisServer);
643        } else {
644          return Cluster.NullAction;
645        }
646      } else {
647        otherServer = pickLeastLoadedFNServer(cluster, favoredNodes, thisServer);
648      }
649      return getAction(thisServer, thisRegion, otherServer, -1);
650    }
651
652    private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
653      Integer[] servers = cluster.serverIndicesSortedByRegionCount;
654      int index;
655      for (index = 0; index < servers.length ; index++) {
656        if ((servers[index] != null) && servers[index] != thisServer) {
657          break;
658        }
659      }
660      return servers[index];
661    }
662
663    private int pickLeastLoadedFNServer(final Cluster cluster, List<ServerName> favoredNodes,
664        int currentServerIndex) {
665      List<Integer> fnIndex = new ArrayList<>();
666      for (ServerName sn : favoredNodes) {
667        if (cluster.serversToIndex.containsKey(sn.getHostAndPort())) {
668          fnIndex.add(cluster.serversToIndex.get(sn.getHostAndPort()));
669        }
670      }
671      int leastLoadedFN = -1;
672      int load = Integer.MAX_VALUE;
673      for (Integer index : fnIndex) {
674        if (index != currentServerIndex) {
675          int temp = cluster.getNumRegions(index);
676          if (temp < load) {
677            load = temp;
678            leastLoadedFN = index;
679          }
680        }
681      }
682      return leastLoadedFN;
683    }
684
685    private int pickMostLoadedServer(final Cluster cluster) {
686      Integer[] servers = cluster.serverIndicesSortedByRegionCount;
687      int index;
688      for (index = servers.length - 1; index > 0 ; index--) {
689        if (servers[index] != null) {
690          break;
691        }
692      }
693      return servers[index];
694    }
695  }
696
697  /*
698   * For all regions correctly assigned to favored nodes, we just use the stochastic balancer
699   * implementation. For the misplaced regions, we assign a bogus server to it and AM takes care.
700   */
701  @Override
702  public synchronized List<RegionPlan> balanceTable(TableName tableName,
703      Map<ServerName, List<RegionInfo>> loadOfOneTable) {
704
705    if (this.services != null) {
706
707      List<RegionPlan> regionPlans = Lists.newArrayList();
708      Map<ServerName, List<RegionInfo>> correctAssignments = new HashMap<>();
709      int misplacedRegions = 0;
710
711      for (Entry<ServerName, List<RegionInfo>> entry : loadOfOneTable.entrySet()) {
712        ServerName current = entry.getKey();
713        List<RegionInfo> regions = Lists.newArrayList();
714        correctAssignments.put(current, regions);
715
716        for (RegionInfo hri : entry.getValue()) {
717          List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
718          if (FavoredNodesPlan.getFavoredServerPosition(favoredNodes, current) != null ||
719              !FavoredNodesManager.isFavoredNodeApplicable(hri)) {
720            regions.add(hri);
721
722          } else {
723            // No favored nodes, lets unassign.
724            LOG.warn("Region not on favored nodes, unassign. Region: " + hri
725              + " current: " + current + " favored nodes: " + favoredNodes);
726            try {
727              this.services.getAssignmentManager().unassign(hri);
728            } catch (IOException e) {
729              LOG.warn("Failed unassign", e);
730              continue;
731            }
732            RegionPlan rp = new RegionPlan(hri, null, null);
733            regionPlans.add(rp);
734            misplacedRegions++;
735          }
736        }
737      }
738      LOG.debug("Found misplaced regions: " + misplacedRegions + ", not on favored nodes.");
739      List<RegionPlan> regionPlansFromBalance = super.balanceTable(tableName, correctAssignments);
740      if (regionPlansFromBalance != null) {
741        regionPlans.addAll(regionPlansFromBalance);
742      }
743      return regionPlans;
744    } else {
745      return super.balanceTable(tableName, loadOfOneTable);
746    }
747  }
748}
749