View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import com.google.common.cache.CacheBuilder;
21  import com.google.common.cache.CacheLoader;
22  import com.google.common.cache.LoadingCache;
23  import com.google.common.collect.Lists;
24  import com.google.common.util.concurrent.Futures;
25  import com.google.common.util.concurrent.ListenableFuture;
26  import com.google.common.util.concurrent.ListeningExecutorService;
27  import com.google.common.util.concurrent.MoreExecutors;
28  import com.google.common.util.concurrent.ThreadFactoryBuilder;
29
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.hbase.ClusterStatus;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.HRegionInfo;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.ServerName;
38  import org.apache.hadoop.hbase.TableName;
39  import org.apache.hadoop.hbase.classification.InterfaceAudience;
40  import org.apache.hadoop.hbase.master.AssignmentManager;
41  import org.apache.hadoop.hbase.master.MasterServices;
42  import org.apache.hadoop.hbase.master.RegionStates;
43  import org.apache.hadoop.hbase.regionserver.HRegion;
44  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
45
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.Collection;
50  import java.util.HashMap;
51  import java.util.List;
52  import java.util.Set;
53  import java.util.concurrent.Callable;
54  import java.util.concurrent.ExecutionException;
55  import java.util.concurrent.Executors;
56  import java.util.concurrent.TimeUnit;
57
58  /**
59   * This will find where data for a region is located in HDFS. It ranks
60   * {@link ServerName}'s by the size of the store files they are holding for a
61   * given region.
62   *
63   */
64  @InterfaceAudience.Private
65  class RegionLocationFinder {
66    private static final Log LOG = LogFactory.getLog(RegionLocationFinder.class);
67    private static final long CACHE_TIME = 240 * 60 * 1000;
68    private static final HDFSBlocksDistribution EMPTY_BLOCK_DISTRIBUTION = new HDFSBlocksDistribution();
69    private Configuration conf;
70    private volatile ClusterStatus status;
71    private MasterServices services;
72    private final ListeningExecutorService executor;
73    private long lastFullRefresh = 0;
74  
75    private CacheLoader<HRegionInfo, HDFSBlocksDistribution> loader =
76        new CacheLoader<HRegionInfo, HDFSBlocksDistribution>() {
77
78          public ListenableFuture<HDFSBlocksDistribution> reload(final HRegionInfo hri,
79                 HDFSBlocksDistribution oldValue) throws Exception {
80            return executor.submit(new Callable<HDFSBlocksDistribution>() {
81              @Override
82              public HDFSBlocksDistribution call() throws Exception {
83                return internalGetTopBlockLocation(hri);
84              }
85            });
86          }
87
88          @Override
89          public HDFSBlocksDistribution load(HRegionInfo key) throws Exception {
90            return internalGetTopBlockLocation(key);
91          }
92        };
93  
94    // The cache for where regions are located.
95    private LoadingCache<HRegionInfo, HDFSBlocksDistribution> cache = null;
96
97    RegionLocationFinder() {
98      this.cache = createCache();
99      executor = MoreExecutors.listeningDecorator(
100         Executors.newScheduledThreadPool(
101             5,
102             new ThreadFactoryBuilder().
103                 setDaemon(true)
104                 .setNameFormat("region-location-%d")
105                 .build()));
106   }
107
108   /**
109    * Create a cache for region to list of servers
110    * @param time time to cache the locations
111    * @return A new Cache.
112    */
113   private LoadingCache<HRegionInfo, HDFSBlocksDistribution> createCache() {
114     return CacheBuilder.newBuilder()
115         .expireAfterWrite(CACHE_TIME, TimeUnit.MILLISECONDS)
116         .build(loader);
117   }
118
119   public Configuration getConf() {
120     return conf;
121   }
122
123   public void setConf(Configuration conf) {
124     this.conf = conf;
125   }
126
127   public void setServices(MasterServices services) {
128     this.services = services;
129   }
130
131   public void setClusterStatus(ClusterStatus status) {
132     long currentTime = EnvironmentEdgeManager.currentTime();
133     this.status = status;
134     if (currentTime > lastFullRefresh + (CACHE_TIME / 2)) {
135       // Only count the refresh if it includes user tables ( eg more than meta and namespace ).
136       lastFullRefresh = scheduleFullRefresh()?currentTime:lastFullRefresh;
137     }
138
139   }
140
141   /**
142    * Refresh all the region locations.
143    *
144    * @return true if user created regions got refreshed.
145    */
146   private boolean scheduleFullRefresh() {
147     // Protect from anything being null while starting up.
148     if (services == null) {
149       return false;
150     }
151     AssignmentManager am = services.getAssignmentManager();
152
153     if (am == null) {
154       return false;
155     }
156     RegionStates regionStates = am.getRegionStates();
157     if (regionStates == null) {
158       return false;
159     }
160
161     Set<HRegionInfo> regions = regionStates.getRegionAssignments().keySet();
162     boolean includesUserTables = false;
163     for (final HRegionInfo hri : regions) {
164       cache.refresh(hri);
165       includesUserTables = includesUserTables || !hri.isSystemTable();
166     }
167     return includesUserTables;
168   }
169
170   protected List<ServerName> getTopBlockLocations(
171       HDFSBlocksDistribution blocksDistribution) {
172     List<String> topHosts = blocksDistribution.getTopHosts();
173     return mapHostNameToServerName(topHosts);
174   }
175
176   /**
177    * Returns an ordered list of hosts which have better locality for this region
178    * than the current host.
179    */
180   protected List<ServerName> getTopBlockLocations(HRegionInfo region, String currentHost) {
181     HDFSBlocksDistribution blocksDistribution = getBlockDistribution(region);
182     List<String> topHosts = new ArrayList<String>();
183     for (String host : blocksDistribution.getTopHosts()) {
184       if (host.equals(currentHost)) {
185         break;
186       }
187       topHosts.add(host);
188     }
189     return mapHostNameToServerName(topHosts);
190   }
191
192   /**
193    * Returns an ordered list of hosts that are hosting the blocks for this
194    * region. The weight of each host is the sum of the block lengths of all
195    * files on that host, so the first host in the list is the server which holds
196    * the most bytes of the given region's HFiles.
197    *
198    * @param region region
199    * @return ordered list of hosts holding blocks of the specified region
200    */
201   protected HDFSBlocksDistribution internalGetTopBlockLocation(HRegionInfo region) {
202     try {
203       HTableDescriptor tableDescriptor = getTableDescriptor(region.getTable());
204       if (tableDescriptor != null) {
205         HDFSBlocksDistribution blocksDistribution =
206             HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region);
207         return blocksDistribution;
208       }
209     } catch (IOException ioe) {
210       LOG.warn("IOException during HDFSBlocksDistribution computation. for " + "region = "
211           + region.getEncodedName(), ioe);
212     }
213 
214     return EMPTY_BLOCK_DISTRIBUTION;
215   }
216
217   /**
218    * return HTableDescriptor for a given tableName
219    *
220    * @param tableName the table name
221    * @return HTableDescriptor
222    * @throws IOException
223    */
224   protected HTableDescriptor getTableDescriptor(TableName tableName) throws IOException {
225     HTableDescriptor tableDescriptor = null;
226     try {
227       if (this.services != null && this.services.getTableDescriptors() != null) {
228         tableDescriptor = this.services.getTableDescriptors().get(tableName);
229       }
230     } catch (FileNotFoundException fnfe) {
231       LOG.debug("FileNotFoundException during getTableDescriptors." + " Current table name = "
232           + tableName, fnfe);
233     }
234 
235     return tableDescriptor;
236   }
237
238   /**
239    * Map hostname to ServerName, The output ServerName list will have the same
240    * order as input hosts.
241    *
242    * @param hosts the list of hosts
243    * @return ServerName list
244    */
245   protected List<ServerName> mapHostNameToServerName(List<String> hosts) {
246     if (hosts == null || status == null) {
247       if (hosts == null) {
248         LOG.warn("RegionLocationFinder top hosts is null");
249       }
250       return Lists.newArrayList();
251     }
252 
253     List<ServerName> topServerNames = new ArrayList<ServerName>();
254     Collection<ServerName> regionServers = status.getServers();
255
256     // create a mapping from hostname to ServerName for fast lookup
257     HashMap<String, List<ServerName>> hostToServerName = new HashMap<String, List<ServerName>>();
258     for (ServerName sn : regionServers) {
259       String host = sn.getHostname();
260       if (!hostToServerName.containsKey(host)) {
261         hostToServerName.put(host, new ArrayList<ServerName>());
262       }
263       hostToServerName.get(host).add(sn);
264     }
265
266     for (String host : hosts) {
267       if (!hostToServerName.containsKey(host)) {
268         continue;
269       }
270       for (ServerName sn : hostToServerName.get(host)) {
271         // it is possible that HDFS is up ( thus host is valid ),
272         // but RS is down ( thus sn is null )
273         if (sn != null) {
274           topServerNames.add(sn);
275         }
276       }
277     }
278     return topServerNames;
279   }
280
281   public HDFSBlocksDistribution getBlockDistribution(HRegionInfo hri) {
282     HDFSBlocksDistribution blockDistbn = null;
283     try {
284       if (cache.asMap().containsKey(hri)) {
285         blockDistbn = cache.get(hri);
286         return blockDistbn;
287       } else {
288         LOG.debug("HDFSBlocksDistribution not found in cache for region "
289             + hri.getRegionNameAsString());
290         blockDistbn = internalGetTopBlockLocation(hri);
291         cache.put(hri, blockDistbn);
292         return blockDistbn;
293       }
294     } catch (ExecutionException e) {
295       LOG.warn("Error while fetching cache entry ", e);
296       blockDistbn = internalGetTopBlockLocation(hri);
297       cache.put(hri, blockDistbn);
298       return blockDistbn;
299     }
300   }
301
302   public ListenableFuture<HDFSBlocksDistribution> asyncGetBlockDistribution(
303       HRegionInfo hri) {
304     try {
305       return loader.reload(hri, EMPTY_BLOCK_DISTRIBUTION);
306     } catch (Exception e) {
307       return Futures.immediateFuture(EMPTY_BLOCK_DISTRIBUTION);
308     }
309   }
310 }