001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.favored;
020
021import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
023import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
024
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.hbase.HBaseIOException;
033import org.apache.hadoop.hbase.HBaseInterfaceAudience;
034import org.apache.hadoop.hbase.ServerMetrics;
035import org.apache.hadoop.hbase.ServerName;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position;
039import org.apache.hadoop.hbase.master.RackManager;
040import org.apache.hadoop.hbase.master.RegionPlan;
041import org.apache.hadoop.hbase.master.ServerManager;
042import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta;
043import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
044import org.apache.hadoop.hbase.util.Pair;
045import org.apache.yetus.audience.InterfaceAudience;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
050import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
051import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
052
053/**
054 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that
055 * assigns favored nodes for each region. There is a Primary RegionServer that hosts
056 * the region, and then there is Secondary and Tertiary RegionServers. Currently, the
057 * favored nodes information is used in creating HDFS files - the Primary RegionServer
058 * passes the primary, secondary, tertiary node addresses as hints to the
059 * DistributedFileSystem API for creating files on the filesystem. These nodes are
060 * treated as hints by the HDFS to place the blocks of the file. This alleviates the
061 * problem to do with reading from remote nodes (since we can make the Secondary
062 * RegionServer as the new Primary RegionServer) after a region is recovered. This
063 * should help provide consistent read latencies for the regions even when their
064 * primary region servers die.
065 *
066 */
067@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
068public class FavoredNodeLoadBalancer extends BaseLoadBalancer implements FavoredNodesPromoter {
069  private static final Logger LOG = LoggerFactory.getLogger(FavoredNodeLoadBalancer.class);
070
071  private RackManager rackManager;
072  private Configuration conf;
073  private FavoredNodesManager fnm;
074
075  @Override
076  public void setConf(Configuration conf) {
077    this.conf = conf;
078  }
079
080  @Override
081  public synchronized void initialize() throws HBaseIOException {
082    super.initialize();
083    super.setConf(conf);
084    this.fnm = services.getFavoredNodesManager();
085    this.rackManager = new RackManager(conf);
086    super.setConf(conf);
087  }
088
089  @Override
090  public List<RegionPlan> balanceCluster(Map<ServerName, List<RegionInfo>> clusterState)  {
091    //TODO. Look at is whether Stochastic loadbalancer can be integrated with this
092    List<RegionPlan> plans = new ArrayList<>();
093    //perform a scan of the meta to get the latest updates (if any)
094    SnapshotOfRegionAssignmentFromMeta snaphotOfRegionAssignment =
095        new SnapshotOfRegionAssignmentFromMeta(super.services.getConnection());
096    try {
097      snaphotOfRegionAssignment.initialize();
098    } catch (IOException ie) {
099      LOG.warn("Not running balancer since exception was thrown " + ie);
100      return plans;
101    }
102    // This is not used? Findbugs says so: Map<ServerName, ServerName> serverNameToServerNameWithoutCode = new HashMap<>();
103    Map<ServerName, ServerName> serverNameWithoutCodeToServerName = new HashMap<>();
104    ServerManager serverMgr = super.services.getServerManager();
105    for (ServerName sn: serverMgr.getOnlineServersList()) {
106      ServerName s = ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE);
107      // FindBugs complains about useless store! serverNameToServerNameWithoutCode.put(sn, s);
108      serverNameWithoutCodeToServerName.put(s, sn);
109    }
110    for (Map.Entry<ServerName, List<RegionInfo>> entry : clusterState.entrySet()) {
111      ServerName currentServer = entry.getKey();
112      //get a server without the startcode for the currentServer
113      ServerName currentServerWithoutStartCode = ServerName.valueOf(currentServer.getHostname(),
114          currentServer.getPort(), ServerName.NON_STARTCODE);
115      List<RegionInfo> list = entry.getValue();
116      for (RegionInfo region : list) {
117        if(!FavoredNodesManager.isFavoredNodeApplicable(region)) {
118          continue;
119        }
120        List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
121        if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) {
122          continue; //either favorednodes does not exist or we are already on the primary node
123        }
124        ServerName destination = null;
125        //check whether the primary is available
126        destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(0));
127        if (destination == null) {
128          //check whether the region is on secondary/tertiary
129          if (currentServerWithoutStartCode.equals(favoredNodes.get(1)) ||
130              currentServerWithoutStartCode.equals(favoredNodes.get(2))) {
131            continue;
132          }
133          //the region is currently on none of the favored nodes
134          //get it on one of them if possible
135          ServerMetrics l1 = super.services.getServerManager().getLoad(
136              serverNameWithoutCodeToServerName.get(favoredNodes.get(1)));
137          ServerMetrics l2 = super.services.getServerManager().getLoad(
138              serverNameWithoutCodeToServerName.get(favoredNodes.get(2)));
139          if (l1 != null && l2 != null) {
140            if (l1.getRegionMetrics().size() > l2.getRegionMetrics().size()) {
141              destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
142            } else {
143              destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
144            }
145          } else if (l1 != null) {
146            destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
147          } else if (l2 != null) {
148            destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
149          }
150        }
151
152        if (destination != null) {
153          RegionPlan plan = new RegionPlan(region, currentServer, destination);
154          plans.add(plan);
155        }
156      }
157    }
158    return plans;
159  }
160
161  @Override
162  public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
163      List<ServerName> servers) throws HBaseIOException {
164    Map<ServerName, List<RegionInfo>> assignmentMap;
165    try {
166      FavoredNodeAssignmentHelper assignmentHelper =
167          new FavoredNodeAssignmentHelper(servers, rackManager);
168      assignmentHelper.initialize();
169      if (!assignmentHelper.canPlaceFavoredNodes()) {
170        return super.roundRobinAssignment(regions, servers);
171      }
172      // Segregate the regions into two types:
173      // 1. The regions that have favored node assignment, and where at least
174      //    one of the favored node is still alive. In this case, try to adhere
175      //    to the current favored nodes assignment as much as possible - i.e.,
176      //    if the current primary is gone, then make the secondary or tertiary
177      //    as the new host for the region (based on their current load).
178      //    Note that we don't change the favored
179      //    node assignments here (even though one or more favored node is currently
180      //    down). It is up to the balanceCluster to do this hard work. The HDFS
181      //    can handle the fact that some nodes in the favored nodes hint is down
182      //    It'd allocate some other DNs. In combination with stale settings for HDFS,
183      //    we should be just fine.
184      // 2. The regions that currently don't have favored node assignment. We will
185      //    need to come up with favored nodes assignments for them. The corner case
186      //    in (1) above is that all the nodes are unavailable and in that case, we
187      //    will note that this region doesn't have favored nodes.
188      Pair<Map<ServerName,List<RegionInfo>>, List<RegionInfo>> segregatedRegions =
189          segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers);
190      Map<ServerName,List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
191      List<RegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond();
192      assignmentMap = new HashMap<>();
193      roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes,
194          servers);
195      // merge the assignment maps
196      assignmentMap.putAll(regionsWithFavoredNodesMap);
197    } catch (Exception ex) {
198      LOG.warn("Encountered exception while doing favored-nodes assignment " + ex +
199          " Falling back to regular assignment");
200      assignmentMap = super.roundRobinAssignment(regions, servers);
201    }
202    return assignmentMap;
203  }
204
205  @Override
206  public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers)
207      throws HBaseIOException {
208    try {
209      FavoredNodeAssignmentHelper assignmentHelper =
210          new FavoredNodeAssignmentHelper(servers, rackManager);
211      assignmentHelper.initialize();
212      ServerName primary = super.randomAssignment(regionInfo, servers);
213      if (!FavoredNodesManager.isFavoredNodeApplicable(regionInfo)
214          || !assignmentHelper.canPlaceFavoredNodes()) {
215        return primary;
216      }
217      List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo);
218      // check if we have a favored nodes mapping for this region and if so, return
219      // a server from the favored nodes list if the passed 'servers' contains this
220      // server as well (available servers, that is)
221      if (favoredNodes != null) {
222        for (ServerName s : favoredNodes) {
223          ServerName serverWithLegitStartCode = availableServersContains(servers, s);
224          if (serverWithLegitStartCode != null) {
225            return serverWithLegitStartCode;
226          }
227        }
228      }
229      List<RegionInfo> regions = new ArrayList<>(1);
230      regions.add(regionInfo);
231      Map<RegionInfo, ServerName> primaryRSMap = new HashMap<>(1);
232      primaryRSMap.put(regionInfo, primary);
233      assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
234      return primary;
235    } catch (Exception ex) {
236      LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex +
237          " Falling back to regular assignment");
238      return super.randomAssignment(regionInfo, servers);
239    }
240  }
241
242  private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>>
243  segregateRegionsAndAssignRegionsWithFavoredNodes(List<RegionInfo> regions,
244      List<ServerName> availableServers) {
245    Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes = new HashMap<>(regions.size() / 2);
246    List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>(regions.size()/2);
247    for (RegionInfo region : regions) {
248      List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
249      ServerName primaryHost = null;
250      ServerName secondaryHost = null;
251      ServerName tertiaryHost = null;
252      if (favoredNodes != null) {
253        for (ServerName s : favoredNodes) {
254          ServerName serverWithLegitStartCode = availableServersContains(availableServers, s);
255          if (serverWithLegitStartCode != null) {
256            FavoredNodesPlan.Position position =
257                FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s);
258            if (Position.PRIMARY.equals(position)) {
259              primaryHost = serverWithLegitStartCode;
260            } else if (Position.SECONDARY.equals(position)) {
261              secondaryHost = serverWithLegitStartCode;
262            } else if (Position.TERTIARY.equals(position)) {
263              tertiaryHost = serverWithLegitStartCode;
264            }
265          }
266        }
267        assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region,
268              primaryHost, secondaryHost, tertiaryHost);
269      }
270      if (primaryHost == null && secondaryHost == null && tertiaryHost == null) {
271        //all favored nodes unavailable
272        regionsWithNoFavoredNodes.add(region);
273      }
274    }
275    return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
276  }
277
278  // Do a check of the hostname and port and return the servername from the servers list
279  // that matched (the favoredNode will have a startcode of -1 but we want the real
280  // server with the legit startcode
281  private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) {
282    for (ServerName server : servers) {
283      if (ServerName.isSameAddress(favoredNode, server)) {
284        return server;
285      }
286    }
287    return null;
288  }
289
290  private void assignRegionToAvailableFavoredNode(Map<ServerName,
291      List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region, ServerName primaryHost,
292      ServerName secondaryHost, ServerName tertiaryHost) {
293    if (primaryHost != null) {
294      addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
295    } else if (secondaryHost != null && tertiaryHost != null) {
296      // assign the region to the one with a lower load
297      // (both have the desired hdfs blocks)
298      ServerName s;
299      ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
300      ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
301      if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) {
302        s = secondaryHost;
303      } else {
304        s = tertiaryHost;
305      }
306      addRegionToMap(assignmentMapForFavoredNodes, region, s);
307    } else if (secondaryHost != null) {
308      addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
309    } else if (tertiaryHost != null) {
310      addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
311    }
312  }
313
314  private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes,
315      RegionInfo region, ServerName host) {
316    List<RegionInfo> regionsOnServer = null;
317    if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
318      regionsOnServer = new ArrayList<>();
319      assignmentMapForFavoredNodes.put(host, regionsOnServer);
320    }
321    regionsOnServer.add(region);
322  }
323
324  public synchronized List<ServerName> getFavoredNodes(RegionInfo regionInfo) {
325    return this.fnm.getFavoredNodes(regionInfo);
326  }
327
328  private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
329      Map<ServerName, List<RegionInfo>> assignmentMap,
330      List<RegionInfo> regions, List<ServerName> servers) throws IOException {
331    Map<RegionInfo, ServerName> primaryRSMap = new HashMap<>();
332    // figure the primary RSs
333    assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
334    assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
335  }
336
337  private void assignSecondaryAndTertiaryNodesForRegion(
338      FavoredNodeAssignmentHelper assignmentHelper,
339      List<RegionInfo> regions, Map<RegionInfo, ServerName> primaryRSMap) throws IOException {
340    // figure the secondary and tertiary RSs
341    Map<RegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
342        assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
343
344    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
345    // now record all the assignments so that we can serve queries later
346    for (RegionInfo region : regions) {
347      // Store the favored nodes without startCode for the ServerName objects
348      // We don't care about the startcode; but only the hostname really
349      List<ServerName> favoredNodesForRegion = new ArrayList<>(3);
350      ServerName sn = primaryRSMap.get(region);
351      favoredNodesForRegion.add(ServerName.valueOf(sn.getHostname(), sn.getPort(),
352          ServerName.NON_STARTCODE));
353      ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region);
354      if (secondaryAndTertiaryNodes != null) {
355        favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(),
356            secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE));
357        favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
358            secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE));
359      }
360      regionFNMap.put(region, favoredNodesForRegion);
361    }
362    fnm.updateFavoredNodes(regionFNMap);
363  }
364
365  /*
366   * Generate Favored Nodes for daughters during region split.
367   *
368   * If the parent does not have FN, regenerates them for the daughters.
369   *
370   * If the parent has FN, inherit two FN from parent for each daughter and generate the remaining.
371   * The primary FN for both the daughters should be the same as parent. Inherit the secondary
372   * FN from the parent but keep it different for each daughter. Choose the remaining FN
373   * randomly. This would give us better distribution over a period of time after enough splits.
374   */
375  @Override
376  public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent,
377      RegionInfo regionA, RegionInfo regionB) throws IOException {
378
379    Map<RegionInfo, List<ServerName>> result = new HashMap<>();
380    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
381    helper.initialize();
382
383    List<ServerName> parentFavoredNodes = getFavoredNodes(parent);
384    if (parentFavoredNodes == null) {
385      LOG.debug("Unable to find favored nodes for parent, " + parent
386          + " generating new favored nodes for daughter");
387      result.put(regionA, helper.generateFavoredNodes(regionA));
388      result.put(regionB, helper.generateFavoredNodes(regionB));
389
390    } else {
391
392      // Lets get the primary and secondary from parent for regionA
393      Set<ServerName> regionAFN =
394          getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY);
395      result.put(regionA, Lists.newArrayList(regionAFN));
396
397      // Lets get the primary and tertiary from parent for regionB
398      Set<ServerName> regionBFN =
399          getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY);
400      result.put(regionB, Lists.newArrayList(regionBFN));
401    }
402
403    fnm.updateFavoredNodes(result);
404  }
405
406  private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper,
407      List<ServerName> parentFavoredNodes, Position primary, Position secondary)
408      throws IOException {
409
410    Set<ServerName> daughterFN = Sets.newLinkedHashSet();
411    if (parentFavoredNodes.size() >= primary.ordinal()) {
412      daughterFN.add(parentFavoredNodes.get(primary.ordinal()));
413    }
414
415    if (parentFavoredNodes.size() >= secondary.ordinal()) {
416      daughterFN.add(parentFavoredNodes.get(secondary.ordinal()));
417    }
418
419    while (daughterFN.size() < FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
420      ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN));
421      daughterFN.add(newNode);
422    }
423    return daughterFN;
424  }
425
426  /*
427   * Generate favored nodes for a region during merge. Choose the FN from one of the sources to
428   * keep it simple.
429   */
430  @Override
431  public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo regionA,
432      RegionInfo regionB) throws IOException {
433    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
434    regionFNMap.put(merged, getFavoredNodes(regionA));
435    fnm.updateFavoredNodes(regionFNMap);
436  }
437
438  @Override
439  public List<RegionPlan> balanceCluster(TableName tableName,
440      Map<ServerName, List<RegionInfo>> clusterState) throws HBaseIOException {
441    return balanceCluster(clusterState);
442  }
443}