View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.balancer;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.NamespaceDescriptor;
34  import org.apache.hadoop.hbase.ServerLoad;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.master.RackManager;
37  import org.apache.hadoop.hbase.master.RegionPlan;
38  import org.apache.hadoop.hbase.master.ServerManager;
39  import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta;
40  import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position;
41  import org.apache.hadoop.hbase.util.Pair;
42  
43  /**
44   * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} 
45   * that assigns favored nodes for each region. There is a Primary RegionServer 
46   * that hosts the region, and then there is Secondary and Tertiary RegionServers. 
47   * Currently, the favored nodes information is used in creating HDFS files - the Primary 
48   * RegionServer passes the primary, secondary, tertiary node addresses as hints to the 
49   * DistributedFileSystem API for creating files on the filesystem. These nodes are treated 
50   * as hints by the HDFS to place the blocks of the file. This alleviates the problem to 
51   * do with reading from remote nodes (since we can make the Secondary RegionServer as the 
52   * new Primary RegionServer) after a region is recovered. This should help provide 
53   * consistent read latencies for the regions even when their primary region servers die.
54   *
55   */
56  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
57  public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
58    private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class);
59  
60    private FavoredNodesPlan globalFavoredNodesAssignmentPlan;
61    private RackManager rackManager;
62  
63    @Override
64    public void setConf(Configuration conf) {
65      super.setConf(conf);
66      globalFavoredNodesAssignmentPlan = new FavoredNodesPlan();
67      this.rackManager = new RackManager(conf);
68      super.setConf(conf);
69    }
70  
71    @Override
72    public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState)  {
73      //TODO. Look at is whether Stochastic loadbalancer can be integrated with this
74      List<RegionPlan> plans = new ArrayList<RegionPlan>();
75      //perform a scan of the meta to get the latest updates (if any)
76      SnapshotOfRegionAssignmentFromMeta snaphotOfRegionAssignment =
77          new SnapshotOfRegionAssignmentFromMeta(super.services.getConnection());
78      try {
79        snaphotOfRegionAssignment.initialize();
80      } catch (IOException ie) {
81        LOG.warn("Not running balancer since exception was thrown " + ie);
82        return plans;
83      }
84      globalFavoredNodesAssignmentPlan = snaphotOfRegionAssignment.getExistingAssignmentPlan();
85      Map<ServerName, ServerName> serverNameToServerNameWithoutCode =
86          new HashMap<ServerName, ServerName>();
87      Map<ServerName, ServerName> serverNameWithoutCodeToServerName =
88          new HashMap<ServerName, ServerName>();
89      ServerManager serverMgr = super.services.getServerManager();
90      for (ServerName sn: serverMgr.getOnlineServersList()) {
91        ServerName s = ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE);
92        serverNameToServerNameWithoutCode.put(sn, s);
93        serverNameWithoutCodeToServerName.put(s, sn);
94      }
95      for (Map.Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
96        ServerName currentServer = entry.getKey();
97        //get a server without the startcode for the currentServer
98        ServerName currentServerWithoutStartCode = ServerName.valueOf(currentServer.getHostname(),
99            currentServer.getPort(), ServerName.NON_STARTCODE);
100       List<HRegionInfo> list = entry.getValue();
101       for (HRegionInfo region : list) {
102         if(region.getTable().getNamespaceAsString()
103             .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
104           continue;
105         }
106         List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
107         if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) {
108           continue; //either favorednodes does not exist or we are already on the primary node
109         }
110         ServerName destination = null;
111         //check whether the primary is available
112         destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(0));
113         if (destination == null) {
114           //check whether the region is on secondary/tertiary
115           if (currentServerWithoutStartCode.equals(favoredNodes.get(1)) ||
116               currentServerWithoutStartCode.equals(favoredNodes.get(2))) {
117             continue;
118           }
119           //the region is currently on none of the favored nodes
120           //get it on one of them if possible
121           ServerLoad l1 = super.services.getServerManager().getLoad(
122               serverNameWithoutCodeToServerName.get(favoredNodes.get(1)));
123           ServerLoad l2 = super.services.getServerManager().getLoad(
124               serverNameWithoutCodeToServerName.get(favoredNodes.get(2)));
125           if (l1 != null && l2 != null) {
126             if (l1.getLoad() > l2.getLoad()) {
127               destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
128             } else {
129               destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
130             }
131           } else if (l1 != null) {
132             destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
133           } else if (l2 != null) {
134             destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
135           }
136         }
137 
138         if (destination != null) {
139           RegionPlan plan = new RegionPlan(region, currentServer, destination);
140           plans.add(plan);
141         }
142       }
143     }
144     return plans;
145   }
146 
147   @Override
148   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
149       List<ServerName> servers) {
150     Map<ServerName, List<HRegionInfo>> assignmentMap;
151     try {
152       FavoredNodeAssignmentHelper assignmentHelper =
153           new FavoredNodeAssignmentHelper(servers, rackManager);
154       assignmentHelper.initialize();
155       if (!assignmentHelper.canPlaceFavoredNodes()) {
156         return super.roundRobinAssignment(regions, servers);
157       }
158       // Segregate the regions into two types:
159       // 1. The regions that have favored node assignment, and where at least
160       //    one of the favored node is still alive. In this case, try to adhere
161       //    to the current favored nodes assignment as much as possible - i.e.,
162       //    if the current primary is gone, then make the secondary or tertiary
163       //    as the new host for the region (based on their current load).
164       //    Note that we don't change the favored
165       //    node assignments here (even though one or more favored node is currently
166       //    down). It is up to the balanceCluster to do this hard work. The HDFS
167       //    can handle the fact that some nodes in the favored nodes hint is down
168       //    It'd allocate some other DNs. In combination with stale settings for HDFS,
169       //    we should be just fine.
170       // 2. The regions that currently don't have favored node assignment. We will
171       //    need to come up with favored nodes assignments for them. The corner case
172       //    in (1) above is that all the nodes are unavailable and in that case, we
173       //    will note that this region doesn't have favored nodes.
174       Pair<Map<ServerName,List<HRegionInfo>>, List<HRegionInfo>> segregatedRegions =
175           segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers);
176       Map<ServerName,List<HRegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
177       List<HRegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond();
178       assignmentMap = new HashMap<ServerName, List<HRegionInfo>>();
179       roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes,
180           servers);
181       // merge the assignment maps
182       assignmentMap.putAll(regionsWithFavoredNodesMap);
183     } catch (Exception ex) {
184       LOG.warn("Encountered exception while doing favored-nodes assignment " + ex +
185           " Falling back to regular assignment");
186       assignmentMap = super.roundRobinAssignment(regions, servers);
187     }
188     return assignmentMap;
189   }
190 
191   @Override
192   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
193     try {
194       FavoredNodeAssignmentHelper assignmentHelper =
195           new FavoredNodeAssignmentHelper(servers, rackManager);
196       assignmentHelper.initialize();
197       ServerName primary = super.randomAssignment(regionInfo, servers);
198       if (!assignmentHelper.canPlaceFavoredNodes()) {
199         return primary;
200       }
201       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
202       // check if we have a favored nodes mapping for this region and if so, return
203       // a server from the favored nodes list if the passed 'servers' contains this
204       // server as well (available servers, that is)
205       if (favoredNodes != null) {
206         for (ServerName s : favoredNodes) {
207           ServerName serverWithLegitStartCode = availableServersContains(servers, s);
208           if (serverWithLegitStartCode != null) {
209             return serverWithLegitStartCode;
210           }
211         }
212       }
213       List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
214       regions.add(regionInfo);
215       Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>(1);
216       primaryRSMap.put(regionInfo, primary);
217       assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
218       return primary;
219     } catch (Exception ex) {
220       LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex +
221           " Falling back to regular assignment");
222       return super.randomAssignment(regionInfo, servers);
223     }
224   }
225 
226   private Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>>
227   segregateRegionsAndAssignRegionsWithFavoredNodes(List<HRegionInfo> regions,
228       List<ServerName> availableServers) {
229     Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes =
230         new HashMap<ServerName, List<HRegionInfo>>(regions.size() / 2);
231     List<HRegionInfo> regionsWithNoFavoredNodes = new ArrayList<HRegionInfo>(regions.size()/2);
232     for (HRegionInfo region : regions) {
233       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
234       ServerName primaryHost = null;
235       ServerName secondaryHost = null;
236       ServerName tertiaryHost = null;
237       if (favoredNodes != null) {
238         for (ServerName s : favoredNodes) {
239           ServerName serverWithLegitStartCode = availableServersContains(availableServers, s);
240           if (serverWithLegitStartCode != null) {
241             FavoredNodesPlan.Position position =
242                 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s);
243             if (Position.PRIMARY.equals(position)) {
244               primaryHost = serverWithLegitStartCode;
245             } else if (Position.SECONDARY.equals(position)) {
246               secondaryHost = serverWithLegitStartCode;
247             } else if (Position.TERTIARY.equals(position)) {
248               tertiaryHost = serverWithLegitStartCode;
249             }
250           }
251         }
252         assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region,
253               primaryHost, secondaryHost, tertiaryHost);
254       }
255       if (primaryHost == null && secondaryHost == null && tertiaryHost == null) {
256         //all favored nodes unavailable
257         regionsWithNoFavoredNodes.add(region);
258       }
259     }
260     return new Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>>(
261         assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
262   }
263 
264   // Do a check of the hostname and port and return the servername from the servers list
265   // that matched (the favoredNode will have a startcode of -1 but we want the real
266   // server with the legit startcode
267   private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) {
268     for (ServerName server : servers) {
269       if (ServerName.isSameHostnameAndPort(favoredNode, server)) {
270         return server;
271       }
272     }
273     return null;
274   }
275 
276   private void assignRegionToAvailableFavoredNode(Map<ServerName,
277       List<HRegionInfo>> assignmentMapForFavoredNodes, HRegionInfo region, ServerName primaryHost,
278       ServerName secondaryHost, ServerName tertiaryHost) {
279     if (primaryHost != null) {
280       addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
281     } else if (secondaryHost != null && tertiaryHost != null) {
282       // assign the region to the one with a lower load
283       // (both have the desired hdfs blocks)
284       ServerName s;
285       ServerLoad tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
286       ServerLoad secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
287       if (secondaryLoad.getLoad() < tertiaryLoad.getLoad()) {
288         s = secondaryHost;
289       } else {
290         s = tertiaryHost;
291       }
292       addRegionToMap(assignmentMapForFavoredNodes, region, s);
293     } else if (secondaryHost != null) {
294       addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
295     } else if (tertiaryHost != null) {
296       addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
297     }
298   }
299 
300   private void addRegionToMap(Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes,
301       HRegionInfo region, ServerName host) {
302     List<HRegionInfo> regionsOnServer = null;
303     if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
304       regionsOnServer = new ArrayList<HRegionInfo>();
305       assignmentMapForFavoredNodes.put(host, regionsOnServer);
306     }
307     regionsOnServer.add(region);
308   }
309 
310   public List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
311     return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
312   }
313 
314   private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
315       Map<ServerName, List<HRegionInfo>> assignmentMap,
316       List<HRegionInfo> regions, List<ServerName> servers) {
317     Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
318     // figure the primary RSs
319     assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
320     assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
321   }
322 
323   private void assignSecondaryAndTertiaryNodesForRegion(
324       FavoredNodeAssignmentHelper assignmentHelper,
325       List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) {
326     // figure the secondary and tertiary RSs
327     Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
328         assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
329     // now record all the assignments so that we can serve queries later
330     for (HRegionInfo region : regions) {
331       // Store the favored nodes without startCode for the ServerName objects
332       // We don't care about the startcode; but only the hostname really
333       List<ServerName> favoredNodesForRegion = new ArrayList<ServerName>(3);
334       ServerName sn = primaryRSMap.get(region);
335       favoredNodesForRegion.add(ServerName.valueOf(sn.getHostname(), sn.getPort(),
336           ServerName.NON_STARTCODE));
337       ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region);
338       if (secondaryAndTertiaryNodes != null) {
339         favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(),
340             secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE));
341         favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
342             secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE));
343       }
344       globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion);
345     }
346   }
347 }