001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.favored;
019
020import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
021import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
023
024import edu.umd.cs.findbugs.annotations.NonNull;
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import org.apache.hadoop.hbase.HBaseIOException;
032import org.apache.hadoop.hbase.HBaseInterfaceAudience;
033import org.apache.hadoop.hbase.ServerMetrics;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position;
038import org.apache.hadoop.hbase.master.RegionPlan;
039import org.apache.hadoop.hbase.master.ServerManager;
040import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
041import org.apache.hadoop.hbase.util.Pair;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
047import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
048import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
049
050/**
051 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that assigns favored
052 * nodes for each region. There is a Primary RegionServer that hosts the region, and then there is
053 * Secondary and Tertiary RegionServers. Currently, the favored nodes information is used in
054 * creating HDFS files - the Primary RegionServer passes the primary, secondary, tertiary node
055 * addresses as hints to the DistributedFileSystem API for creating files on the filesystem. These
056 * nodes are treated as hints by the HDFS to place the blocks of the file. This alleviates the
057 * problem to do with reading from remote nodes (since we can make the Secondary RegionServer as the
058 * new Primary RegionServer) after a region is recovered. This should help provide consistent read
059 * latencies for the regions even when their primary region servers die.
060 */
061@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
062public class FavoredNodeLoadBalancer extends BaseLoadBalancer implements FavoredNodesPromoter {
063  private static final Logger LOG = LoggerFactory.getLogger(FavoredNodeLoadBalancer.class);
064
065  private FavoredNodesManager fnm;
066
067  @Override
068  public void initialize() {
069    super.initialize();
070    this.fnm = services.getFavoredNodesManager();
071  }
072
073  @Override
074  protected List<RegionPlan> balanceTable(TableName tableName,
075    Map<ServerName, List<RegionInfo>> loadOfOneTable) {
076    // TODO. Look at is whether Stochastic loadbalancer can be integrated with this
077    List<RegionPlan> plans = new ArrayList<>();
078    Map<ServerName, ServerName> serverNameWithoutCodeToServerName = new HashMap<>();
079    ServerManager serverMgr = super.services.getServerManager();
080    for (ServerName sn : serverMgr.getOnlineServersList()) {
081      ServerName s = ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE);
082      // FindBugs complains about useless store! serverNameToServerNameWithoutCode.put(sn, s);
083      serverNameWithoutCodeToServerName.put(s, sn);
084    }
085    for (Map.Entry<ServerName, List<RegionInfo>> entry : loadOfOneTable.entrySet()) {
086      ServerName currentServer = entry.getKey();
087      // get a server without the startcode for the currentServer
088      ServerName currentServerWithoutStartCode = ServerName.valueOf(currentServer.getHostname(),
089        currentServer.getPort(), ServerName.NON_STARTCODE);
090      List<RegionInfo> list = entry.getValue();
091      for (RegionInfo region : list) {
092        if (!FavoredNodesManager.isFavoredNodeApplicable(region)) {
093          continue;
094        }
095        List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
096        if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) {
097          continue; // either favorednodes does not exist or we are already on the primary node
098        }
099        ServerName destination = null;
100        // check whether the primary is available
101        destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(0));
102        if (destination == null) {
103          // check whether the region is on secondary/tertiary
104          if (
105            currentServerWithoutStartCode.equals(favoredNodes.get(1))
106              || currentServerWithoutStartCode.equals(favoredNodes.get(2))
107          ) {
108            continue;
109          }
110          // the region is currently on none of the favored nodes
111          // get it on one of them if possible
112          ServerMetrics l1 = super.services.getServerManager()
113            .getLoad(serverNameWithoutCodeToServerName.get(favoredNodes.get(1)));
114          ServerMetrics l2 = super.services.getServerManager()
115            .getLoad(serverNameWithoutCodeToServerName.get(favoredNodes.get(2)));
116          if (l1 != null && l2 != null) {
117            if (l1.getRegionMetrics().size() > l2.getRegionMetrics().size()) {
118              destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
119            } else {
120              destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
121            }
122          } else if (l1 != null) {
123            destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
124          } else if (l2 != null) {
125            destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
126          }
127        }
128
129        if (destination != null) {
130          RegionPlan plan = new RegionPlan(region, currentServer, destination);
131          plans.add(plan);
132        }
133      }
134    }
135    return plans;
136  }
137
138  @Override
139  @NonNull
140  public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
141    List<ServerName> servers) throws HBaseIOException {
142    Map<ServerName, List<RegionInfo>> assignmentMap;
143    try {
144      FavoredNodeAssignmentHelper assignmentHelper =
145        new FavoredNodeAssignmentHelper(servers, rackManager);
146      assignmentHelper.initialize();
147      if (!assignmentHelper.canPlaceFavoredNodes()) {
148        return super.roundRobinAssignment(regions, servers);
149      }
150      // Segregate the regions into two types:
151      // 1. The regions that have favored node assignment, and where at least
152      // one of the favored node is still alive. In this case, try to adhere
153      // to the current favored nodes assignment as much as possible - i.e.,
154      // if the current primary is gone, then make the secondary or tertiary
155      // as the new host for the region (based on their current load).
156      // Note that we don't change the favored
157      // node assignments here (even though one or more favored node is currently
158      // down). It is up to the balanceCluster to do this hard work. The HDFS
159      // can handle the fact that some nodes in the favored nodes hint is down
160      // It'd allocate some other DNs. In combination with stale settings for HDFS,
161      // we should be just fine.
162      // 2. The regions that currently don't have favored node assignment. We will
163      // need to come up with favored nodes assignments for them. The corner case
164      // in (1) above is that all the nodes are unavailable and in that case, we
165      // will note that this region doesn't have favored nodes.
166      Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> segregatedRegions =
167        segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers);
168      Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
169      List<RegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond();
170      assignmentMap = new HashMap<>();
171      roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes, servers);
172      // merge the assignment maps
173      assignmentMap.putAll(regionsWithFavoredNodesMap);
174    } catch (Exception ex) {
175      LOG.warn("Encountered exception while doing favored-nodes assignment " + ex
176        + " Falling back to regular assignment");
177      assignmentMap = super.roundRobinAssignment(regions, servers);
178    }
179    return assignmentMap;
180  }
181
182  @Override
183  public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers)
184    throws HBaseIOException {
185    try {
186      FavoredNodeAssignmentHelper assignmentHelper =
187        new FavoredNodeAssignmentHelper(servers, rackManager);
188      assignmentHelper.initialize();
189      ServerName primary = super.randomAssignment(regionInfo, servers);
190      if (
191        !FavoredNodesManager.isFavoredNodeApplicable(regionInfo)
192          || !assignmentHelper.canPlaceFavoredNodes()
193      ) {
194        return primary;
195      }
196      List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo);
197      // check if we have a favored nodes mapping for this region and if so, return
198      // a server from the favored nodes list if the passed 'servers' contains this
199      // server as well (available servers, that is)
200      if (favoredNodes != null) {
201        for (ServerName s : favoredNodes) {
202          ServerName serverWithLegitStartCode = availableServersContains(servers, s);
203          if (serverWithLegitStartCode != null) {
204            return serverWithLegitStartCode;
205          }
206        }
207      }
208      List<RegionInfo> regions = new ArrayList<>(1);
209      regions.add(regionInfo);
210      Map<RegionInfo, ServerName> primaryRSMap = new HashMap<>(1);
211      primaryRSMap.put(regionInfo, primary);
212      assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
213      return primary;
214    } catch (Exception ex) {
215      LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex
216        + " Falling back to regular assignment");
217      return super.randomAssignment(regionInfo, servers);
218    }
219  }
220
221  private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>>
222    segregateRegionsAndAssignRegionsWithFavoredNodes(List<RegionInfo> regions,
223      List<ServerName> availableServers) {
224    Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes =
225      new HashMap<>(regions.size() / 2);
226    List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>(regions.size() / 2);
227    for (RegionInfo region : regions) {
228      List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
229      ServerName primaryHost = null;
230      ServerName secondaryHost = null;
231      ServerName tertiaryHost = null;
232      if (favoredNodes != null) {
233        for (ServerName s : favoredNodes) {
234          ServerName serverWithLegitStartCode = availableServersContains(availableServers, s);
235          if (serverWithLegitStartCode != null) {
236            FavoredNodesPlan.Position position =
237              FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s);
238            if (Position.PRIMARY.equals(position)) {
239              primaryHost = serverWithLegitStartCode;
240            } else if (Position.SECONDARY.equals(position)) {
241              secondaryHost = serverWithLegitStartCode;
242            } else if (Position.TERTIARY.equals(position)) {
243              tertiaryHost = serverWithLegitStartCode;
244            }
245          }
246        }
247        assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, primaryHost,
248          secondaryHost, tertiaryHost);
249      }
250      if (primaryHost == null && secondaryHost == null && tertiaryHost == null) {
251        // all favored nodes unavailable
252        regionsWithNoFavoredNodes.add(region);
253      }
254    }
255    return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
256  }
257
258  // Do a check of the hostname and port and return the servername from the servers list
259  // that matched (the favoredNode will have a startcode of -1 but we want the real
260  // server with the legit startcode
261  private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) {
262    for (ServerName server : servers) {
263      if (ServerName.isSameAddress(favoredNode, server)) {
264        return server;
265      }
266    }
267    return null;
268  }
269
270  private void assignRegionToAvailableFavoredNode(
271    Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region,
272    ServerName primaryHost, ServerName secondaryHost, ServerName tertiaryHost) {
273    if (primaryHost != null) {
274      addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
275    } else if (secondaryHost != null && tertiaryHost != null) {
276      // assign the region to the one with a lower load
277      // (both have the desired hdfs blocks)
278      ServerName s;
279      ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
280      ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
281      if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) {
282        s = secondaryHost;
283      } else {
284        s = tertiaryHost;
285      }
286      addRegionToMap(assignmentMapForFavoredNodes, region, s);
287    } else if (secondaryHost != null) {
288      addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
289    } else if (tertiaryHost != null) {
290      addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
291    }
292  }
293
294  private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes,
295    RegionInfo region, ServerName host) {
296    List<RegionInfo> regionsOnServer = null;
297    if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
298      regionsOnServer = new ArrayList<>();
299      assignmentMapForFavoredNodes.put(host, regionsOnServer);
300    }
301    regionsOnServer.add(region);
302  }
303
304  public List<ServerName> getFavoredNodes(RegionInfo regionInfo) {
305    return this.fnm.getFavoredNodes(regionInfo);
306  }
307
308  private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
309    Map<ServerName, List<RegionInfo>> assignmentMap, List<RegionInfo> regions,
310    List<ServerName> servers) throws IOException {
311    Map<RegionInfo, ServerName> primaryRSMap = new HashMap<>();
312    // figure the primary RSs
313    assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
314    assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
315  }
316
317  private void assignSecondaryAndTertiaryNodesForRegion(
318    FavoredNodeAssignmentHelper assignmentHelper, List<RegionInfo> regions,
319    Map<RegionInfo, ServerName> primaryRSMap) throws IOException {
320    // figure the secondary and tertiary RSs
321    Map<RegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
322      assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
323
324    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
325    // now record all the assignments so that we can serve queries later
326    for (RegionInfo region : regions) {
327      // Store the favored nodes without startCode for the ServerName objects
328      // We don't care about the startcode; but only the hostname really
329      List<ServerName> favoredNodesForRegion = new ArrayList<>(3);
330      ServerName sn = primaryRSMap.get(region);
331      favoredNodesForRegion
332        .add(ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE));
333      ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region);
334      if (secondaryAndTertiaryNodes != null) {
335        favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(),
336          secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE));
337        favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
338          secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE));
339      }
340      regionFNMap.put(region, favoredNodesForRegion);
341    }
342    fnm.updateFavoredNodes(regionFNMap);
343  }
344
345  /*
346   * Generate Favored Nodes for daughters during region split. If the parent does not have FN,
347   * regenerates them for the daughters. If the parent has FN, inherit two FN from parent for each
348   * daughter and generate the remaining. The primary FN for both the daughters should be the same
349   * as parent. Inherit the secondary FN from the parent but keep it different for each daughter.
350   * Choose the remaining FN randomly. This would give us better distribution over a period of time
351   * after enough splits.
352   */
353  @Override
354  public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent,
355    RegionInfo regionA, RegionInfo regionB) throws IOException {
356
357    Map<RegionInfo, List<ServerName>> result = new HashMap<>();
358    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
359    helper.initialize();
360
361    List<ServerName> parentFavoredNodes = getFavoredNodes(parent);
362    if (parentFavoredNodes == null) {
363      LOG.debug("Unable to find favored nodes for parent, " + parent
364        + " generating new favored nodes for daughter");
365      result.put(regionA, helper.generateFavoredNodes(regionA));
366      result.put(regionB, helper.generateFavoredNodes(regionB));
367
368    } else {
369
370      // Lets get the primary and secondary from parent for regionA
371      Set<ServerName> regionAFN =
372        getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY);
373      result.put(regionA, Lists.newArrayList(regionAFN));
374
375      // Lets get the primary and tertiary from parent for regionB
376      Set<ServerName> regionBFN =
377        getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY);
378      result.put(regionB, Lists.newArrayList(regionBFN));
379    }
380
381    fnm.updateFavoredNodes(result);
382  }
383
384  private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper,
385    List<ServerName> parentFavoredNodes, Position primary, Position secondary) throws IOException {
386
387    Set<ServerName> daughterFN = Sets.newLinkedHashSet();
388    if (parentFavoredNodes.size() >= primary.ordinal()) {
389      daughterFN.add(parentFavoredNodes.get(primary.ordinal()));
390    }
391
392    if (parentFavoredNodes.size() >= secondary.ordinal()) {
393      daughterFN.add(parentFavoredNodes.get(secondary.ordinal()));
394    }
395
396    while (daughterFN.size() < FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
397      ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN));
398      daughterFN.add(newNode);
399    }
400    return daughterFN;
401  }
402
403  /*
404   * Generate favored nodes for a region during merge. Choose the FN from one of the sources to keep
405   * it simple.
406   */
407  @Override
408  public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo[] mergeParents)
409    throws IOException {
410    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
411    regionFNMap.put(merged, getFavoredNodes(mergeParents[0]));
412    fnm.updateFavoredNodes(regionFNMap);
413  }
414
415}