1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master.balancer;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Random;
30 import java.util.Set;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.HRegionInfo;
37 import org.apache.hadoop.hbase.MetaTableAccessor;
38 import org.apache.hadoop.hbase.ServerName;
39 import org.apache.hadoop.hbase.TableName;
40 import org.apache.hadoop.hbase.classification.InterfaceAudience;
41 import org.apache.hadoop.hbase.client.Connection;
42 import org.apache.hadoop.hbase.client.ConnectionFactory;
43 import org.apache.hadoop.hbase.client.Put;
44 import org.apache.hadoop.hbase.client.Table;
45 import org.apache.hadoop.hbase.master.RackManager;
46 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
47 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
48 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.FavoredNodes;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
51
52 import com.google.protobuf.InvalidProtocolBufferException;
53
54
55
56
57
58
59
60
61 @InterfaceAudience.Private
62 public class FavoredNodeAssignmentHelper {
63 private static final Log LOG = LogFactory.getLog(FavoredNodeAssignmentHelper.class);
64 private RackManager rackManager;
65 private Map<String, List<ServerName>> rackToRegionServerMap;
66 private List<String> uniqueRackList;
67 private Map<ServerName, String> regionServerToRackMap;
68 private Random random;
69 private List<ServerName> servers;
70 public static final byte [] FAVOREDNODES_QUALIFIER = Bytes.toBytes("fn");
71 public final static short FAVORED_NODES_NUM = 3;
72
73 public FavoredNodeAssignmentHelper(final List<ServerName> servers, Configuration conf) {
74 this(servers, new RackManager(conf));
75 }
76
77 public FavoredNodeAssignmentHelper(final List<ServerName> servers,
78 final RackManager rackManager) {
79 this.servers = servers;
80 this.rackManager = rackManager;
81 this.rackToRegionServerMap = new HashMap<String, List<ServerName>>();
82 this.regionServerToRackMap = new HashMap<ServerName, String>();
83 this.uniqueRackList = new ArrayList<String>();
84 this.random = new Random();
85 }
86
87
88
89
90
91
92
93 public static void updateMetaWithFavoredNodesInfo(
94 Map<HRegionInfo, List<ServerName>> regionToFavoredNodes,
95 Connection connection) throws IOException {
96 List<Put> puts = new ArrayList<Put>();
97 for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) {
98 Put put = makePutFromRegionInfo(entry.getKey(), entry.getValue());
99 if (put != null) {
100 puts.add(put);
101 }
102 }
103 MetaTableAccessor.putsToMetaTable(connection, puts);
104 LOG.info("Added " + puts.size() + " regions in META");
105 }
106
107
108
109
110
111
112
113 public static void updateMetaWithFavoredNodesInfo(
114 Map<HRegionInfo, List<ServerName>> regionToFavoredNodes,
115 Configuration conf) throws IOException {
116 List<Put> puts = new ArrayList<Put>();
117 for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) {
118 Put put = makePutFromRegionInfo(entry.getKey(), entry.getValue());
119 if (put != null) {
120 puts.add(put);
121 }
122 }
123
124
125
126
127
128 try (Connection connection = ConnectionFactory.createConnection(conf)) {
129 try (Table metaTable = connection.getTable(TableName.META_TABLE_NAME)) {
130 metaTable.put(puts);
131 }
132 }
133 LOG.info("Added " + puts.size() + " regions in META");
134 }
135
136
137
138
139
140
141
142
143 static Put makePutFromRegionInfo(HRegionInfo regionInfo, List<ServerName>favoredNodeList)
144 throws IOException {
145 Put put = null;
146 if (favoredNodeList != null) {
147 put = MetaTableAccessor.makePutFromRegionInfo(regionInfo);
148 byte[] favoredNodes = getFavoredNodes(favoredNodeList);
149 put.addImmutable(HConstants.CATALOG_FAMILY, FAVOREDNODES_QUALIFIER,
150 EnvironmentEdgeManager.currentTime(), favoredNodes);
151 LOG.info("Create the region " + regionInfo.getRegionNameAsString() +
152 " with favored nodes " + Bytes.toString(favoredNodes));
153 }
154 return put;
155 }
156
157
158
159
160
161
162 public static ServerName[] getFavoredNodesList(byte[] favoredNodes)
163 throws InvalidProtocolBufferException {
164 FavoredNodes f = FavoredNodes.parseFrom(favoredNodes);
165 List<HBaseProtos.ServerName> protoNodes = f.getFavoredNodeList();
166 ServerName[] servers = new ServerName[protoNodes.size()];
167 int i = 0;
168 for (HBaseProtos.ServerName node : protoNodes) {
169 servers[i++] = ProtobufUtil.toServerName(node);
170 }
171 return servers;
172 }
173
174
175
176
177
178 public static byte[] getFavoredNodes(List<ServerName> serverAddrList) {
179 FavoredNodes.Builder f = FavoredNodes.newBuilder();
180 for (ServerName s : serverAddrList) {
181 HBaseProtos.ServerName.Builder b = HBaseProtos.ServerName.newBuilder();
182 b.setHostName(s.getHostname());
183 b.setPort(s.getPort());
184 b.setStartCode(s.getStartcode());
185 f.addFavoredNode(b.build());
186 }
187 return f.build().toByteArray();
188 }
189
190
191
192
193
194
195
196
197
198
199 void placePrimaryRSAsRoundRobin(Map<ServerName, List<HRegionInfo>> assignmentMap,
200 Map<HRegionInfo, ServerName> primaryRSMap, List<HRegionInfo> regions) {
201 List<String> rackList = new ArrayList<String>(rackToRegionServerMap.size());
202 rackList.addAll(rackToRegionServerMap.keySet());
203 int rackIndex = random.nextInt(rackList.size());
204 int maxRackSize = 0;
205 for (Map.Entry<String,List<ServerName>> r : rackToRegionServerMap.entrySet()) {
206 if (r.getValue().size() > maxRackSize) {
207 maxRackSize = r.getValue().size();
208 }
209 }
210 int numIterations = 0;
211 int firstServerIndex = random.nextInt(maxRackSize);
212
213 int serverIndex = firstServerIndex;
214 for (HRegionInfo regionInfo : regions) {
215 List<ServerName> currentServerList;
216 String rackName;
217 while (true) {
218 rackName = rackList.get(rackIndex);
219 numIterations++;
220
221 currentServerList = rackToRegionServerMap.get(rackName);
222
223 if (serverIndex >= currentServerList.size()) {
224 if (numIterations % rackList.size() == 0) {
225 if (++serverIndex >= maxRackSize) serverIndex = 0;
226 }
227 if ((++rackIndex) >= rackList.size()) {
228 rackIndex = 0;
229 }
230 } else break;
231 }
232
233
234 ServerName currentServer = currentServerList.get(serverIndex);
235
236
237 primaryRSMap.put(regionInfo, currentServer);
238 List<HRegionInfo> regionsForServer = assignmentMap.get(currentServer);
239 if (regionsForServer == null) {
240 regionsForServer = new ArrayList<HRegionInfo>();
241 assignmentMap.put(currentServer, regionsForServer);
242 }
243 regionsForServer.add(regionInfo);
244
245
246 if (numIterations % rackList.size() == 0) {
247 ++serverIndex;
248 }
249 if ((++rackIndex) >= rackList.size()) {
250 rackIndex = 0;
251 }
252 }
253 }
254
255 Map<HRegionInfo, ServerName[]> placeSecondaryAndTertiaryRS(
256 Map<HRegionInfo, ServerName> primaryRSMap) {
257 Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
258 new HashMap<HRegionInfo, ServerName[]>();
259 for (Map.Entry<HRegionInfo, ServerName> entry : primaryRSMap.entrySet()) {
260
261 HRegionInfo regionInfo = entry.getKey();
262 ServerName primaryRS = entry.getValue();
263 try {
264
265 ServerName[] favoredNodes;
266
267 String primaryRack = rackManager.getRack(primaryRS);
268
269 if (getTotalNumberOfRacks() == 1) {
270 favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack);
271 } else {
272 favoredNodes = multiRackCase(regionInfo, primaryRS, primaryRack);
273 }
274 if (favoredNodes != null) {
275 secondaryAndTertiaryMap.put(regionInfo, favoredNodes);
276 LOG.debug("Place the secondary and tertiary region server for region "
277 + regionInfo.getRegionNameAsString());
278 }
279 } catch (Exception e) {
280 LOG.warn("Cannot place the favored nodes for region " +
281 regionInfo.getRegionNameAsString() + " because " + e, e);
282 continue;
283 }
284 }
285 return secondaryAndTertiaryMap;
286 }
287
288 private Map<ServerName, Set<HRegionInfo>> mapRSToPrimaries(
289 Map<HRegionInfo, ServerName> primaryRSMap) {
290 Map<ServerName, Set<HRegionInfo>> primaryServerMap =
291 new HashMap<ServerName, Set<HRegionInfo>>();
292 for (Entry<HRegionInfo, ServerName> e : primaryRSMap.entrySet()) {
293 Set<HRegionInfo> currentSet = primaryServerMap.get(e.getValue());
294 if (currentSet == null) {
295 currentSet = new HashSet<HRegionInfo>();
296 }
297 currentSet.add(e.getKey());
298 primaryServerMap.put(e.getValue(), currentSet);
299 }
300 return primaryServerMap;
301 }
302
303
304
305
306
307
308
309
310 public Map<HRegionInfo, ServerName[]> placeSecondaryAndTertiaryWithRestrictions(
311 Map<HRegionInfo, ServerName> primaryRSMap) {
312 Map<ServerName, Set<HRegionInfo>> serverToPrimaries =
313 mapRSToPrimaries(primaryRSMap);
314 Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
315 new HashMap<HRegionInfo, ServerName[]>();
316
317 for (Entry<HRegionInfo, ServerName> entry : primaryRSMap.entrySet()) {
318
319 HRegionInfo regionInfo = entry.getKey();
320 ServerName primaryRS = entry.getValue();
321 try {
322
323 String primaryRack = rackManager.getRack(primaryRS);
324 ServerName[] favoredNodes = null;
325 if (getTotalNumberOfRacks() == 1) {
326
327
328 favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack);
329 } else {
330 favoredNodes = multiRackCaseWithRestrictions(serverToPrimaries,
331 secondaryAndTertiaryMap, primaryRack, primaryRS, regionInfo);
332 }
333 if (favoredNodes != null) {
334 secondaryAndTertiaryMap.put(regionInfo, favoredNodes);
335 LOG.debug("Place the secondary and tertiary region server for region "
336 + regionInfo.getRegionNameAsString());
337 }
338 } catch (Exception e) {
339 LOG.warn("Cannot place the favored nodes for region "
340 + regionInfo.getRegionNameAsString() + " because " + e, e);
341 continue;
342 }
343 }
344 return secondaryAndTertiaryMap;
345 }
346
347 private ServerName[] multiRackCaseWithRestrictions(
348 Map<ServerName, Set<HRegionInfo>> serverToPrimaries,
349 Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap,
350 String primaryRack, ServerName primaryRS, HRegionInfo regionInfo) throws IOException {
351
352
353
354 Set<String> rackSkipSet = new HashSet<String>();
355 rackSkipSet.add(primaryRack);
356 String secondaryRack = getOneRandomRack(rackSkipSet);
357 List<ServerName> serverList = getServersFromRack(secondaryRack);
358 Set<ServerName> serverSet = new HashSet<ServerName>();
359 serverSet.addAll(serverList);
360 ServerName[] favoredNodes;
361 if (serverList.size() >= 2) {
362
363
364
365 Set<HRegionInfo> primaries = serverToPrimaries.get(primaryRS);
366 Set<ServerName> skipServerSet = new HashSet<ServerName>();
367 while (true) {
368 ServerName[] secondaryAndTertiary = null;
369 if (primaries.size() > 1) {
370
371 for (HRegionInfo primary : primaries) {
372 secondaryAndTertiary = secondaryAndTertiaryMap.get(primary);
373 if (secondaryAndTertiary != null) {
374 if (regionServerToRackMap.get(secondaryAndTertiary[0]).equals(secondaryRack)) {
375 skipServerSet.add(secondaryAndTertiary[0]);
376 }
377 if (regionServerToRackMap.get(secondaryAndTertiary[1]).equals(secondaryRack)) {
378 skipServerSet.add(secondaryAndTertiary[1]);
379 }
380 }
381 }
382 }
383 if (skipServerSet.size() + 2 <= serverSet.size())
384 break;
385 skipServerSet.clear();
386 rackSkipSet.add(secondaryRack);
387
388 if (rackSkipSet.size() == getTotalNumberOfRacks()) {
389
390 skipServerSet.remove(secondaryAndTertiary[0]);
391 skipServerSet.remove(secondaryAndTertiary[1]);
392 break;
393 }
394 secondaryRack = getOneRandomRack(rackSkipSet);
395 serverList = getServersFromRack(secondaryRack);
396 serverSet = new HashSet<ServerName>();
397 serverSet.addAll(serverList);
398 }
399
400
401 ServerName secondaryRS = getOneRandomServer(secondaryRack, skipServerSet);
402 skipServerSet.add(secondaryRS);
403
404 ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet);
405
406 if (secondaryRS == null || tertiaryRS == null) {
407 LOG.error("Cannot place the secondary and tertiary"
408 + " region server for region "
409 + regionInfo.getRegionNameAsString());
410 }
411
412 favoredNodes = new ServerName[2];
413 favoredNodes[0] = secondaryRS;
414 favoredNodes[1] = tertiaryRS;
415 } else {
416
417
418 favoredNodes = new ServerName[2];
419 ServerName secondary = getOneRandomServer(secondaryRack);
420 favoredNodes[0] = secondary;
421
422
423 if (getTotalNumberOfRacks() == 2) {
424
425 Set<ServerName> serverSkipSet = new HashSet<ServerName>();
426 serverSkipSet.add(primaryRS);
427 favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet);
428 } else {
429
430 rackSkipSet.add(secondaryRack);
431 String tertiaryRandomRack = getOneRandomRack(rackSkipSet);
432 favoredNodes[1] = getOneRandomServer(tertiaryRandomRack);
433 }
434 }
435 return favoredNodes;
436 }
437
438 private ServerName[] singleRackCase(HRegionInfo regionInfo,
439 ServerName primaryRS,
440 String primaryRack) throws IOException {
441
442
443 List<ServerName> serverList = getServersFromRack(primaryRack);
444 if (serverList.size() <= 2) {
445
446
447 return null;
448 } else {
449
450
451 Set<ServerName> serverSkipSet = new HashSet<ServerName>();
452 serverSkipSet.add(primaryRS);
453
454
455 ServerName secondaryRS = getOneRandomServer(primaryRack, serverSkipSet);
456
457 serverSkipSet.add(secondaryRS);
458
459
460 ServerName tertiaryRS =
461 getOneRandomServer(primaryRack, serverSkipSet);
462
463 if (secondaryRS == null || tertiaryRS == null) {
464 LOG.error("Cannot place the secondary and terinary" +
465 "region server for region " +
466 regionInfo.getRegionNameAsString());
467 }
468
469 ServerName[] favoredNodes = new ServerName[2];
470 favoredNodes[0] = secondaryRS;
471 favoredNodes[1] = tertiaryRS;
472 return favoredNodes;
473 }
474 }
475
476 private ServerName[] multiRackCase(HRegionInfo regionInfo,
477 ServerName primaryRS,
478 String primaryRack) throws IOException {
479
480
481
482
483
484 Set<String> rackSkipSet = new HashSet<String>();
485 rackSkipSet.add(primaryRack);
486 ServerName[] favoredNodes = new ServerName[2];
487 String secondaryRack = getOneRandomRack(rackSkipSet);
488 List<ServerName> serverList = getServersFromRack(secondaryRack);
489 if (serverList.size() >= 2) {
490
491
492
493 ServerName secondaryRS = getOneRandomServer(secondaryRack);
494
495
496 Set<ServerName> skipServerSet = new HashSet<ServerName>();
497 skipServerSet.add(secondaryRS);
498
499 ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet);
500
501 if (secondaryRS == null || tertiaryRS == null) {
502 LOG.error("Cannot place the secondary and terinary" +
503 "region server for region " +
504 regionInfo.getRegionNameAsString());
505 }
506
507 favoredNodes[0] = secondaryRS;
508 favoredNodes[1] = tertiaryRS;
509 } else {
510
511
512 favoredNodes[0] = getOneRandomServer(secondaryRack);
513
514
515 if (getTotalNumberOfRacks() == 2) {
516
517 Set<ServerName> serverSkipSet = new HashSet<ServerName>();
518 serverSkipSet.add(primaryRS);
519 favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet);
520 } else {
521
522 rackSkipSet.add(secondaryRack);
523 String tertiaryRandomRack = getOneRandomRack(rackSkipSet);
524 favoredNodes[1] = getOneRandomServer(tertiaryRandomRack);
525 }
526 }
527 return favoredNodes;
528 }
529
530 boolean canPlaceFavoredNodes() {
531 int serverSize = this.regionServerToRackMap.size();
532 return (serverSize >= FAVORED_NODES_NUM);
533 }
534
535 public void initialize() {
536 for (ServerName sn : this.servers) {
537 String rackName = this.rackManager.getRack(sn);
538 List<ServerName> serverList = this.rackToRegionServerMap.get(rackName);
539 if (serverList == null) {
540 serverList = new ArrayList<ServerName>();
541
542 this.uniqueRackList.add(rackName);
543 }
544 if (!serverList.contains(sn)) {
545 serverList.add(sn);
546 this.rackToRegionServerMap.put(rackName, serverList);
547 this.regionServerToRackMap.put(sn, rackName);
548 }
549 }
550 }
551
552 private int getTotalNumberOfRacks() {
553 return this.uniqueRackList.size();
554 }
555
556 private List<ServerName> getServersFromRack(String rack) {
557 return this.rackToRegionServerMap.get(rack);
558 }
559
560 private ServerName getOneRandomServer(String rack,
561 Set<ServerName> skipServerSet) throws IOException {
562 if(rack == null) return null;
563 List<ServerName> serverList = this.rackToRegionServerMap.get(rack);
564 if (serverList == null) return null;
565
566
567 if (skipServerSet != null && serverList.size() <= skipServerSet.size()) {
568 throw new IOException("Cannot randomly pick another random server");
569 }
570
571 ServerName randomServer;
572 do {
573 int randomIndex = random.nextInt(serverList.size());
574 randomServer = serverList.get(randomIndex);
575 } while (skipServerSet != null && skipServerSet.contains(randomServer));
576
577 return randomServer;
578 }
579
580 private ServerName getOneRandomServer(String rack) throws IOException {
581 return this.getOneRandomServer(rack, null);
582 }
583
584 private String getOneRandomRack(Set<String> skipRackSet) throws IOException {
585 if (skipRackSet == null || uniqueRackList.size() <= skipRackSet.size()) {
586 throw new IOException("Cannot randomly pick another random server");
587 }
588
589 String randomRack;
590 do {
591 int randomIndex = random.nextInt(this.uniqueRackList.size());
592 randomRack = this.uniqueRackList.get(randomIndex);
593 } while (skipRackSet.contains(randomRack));
594
595 return randomRack;
596 }
597
598 public static String getFavoredNodesAsString(List<ServerName> nodes) {
599 StringBuffer strBuf = new StringBuffer();
600 int i = 0;
601 for (ServerName node : nodes) {
602 strBuf.append(node.getHostAndPort());
603 if (++i != nodes.size()) strBuf.append(";");
604 }
605 return strBuf.toString();
606 }
607 }