001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertFalse; 022import static org.junit.jupiter.api.Assertions.assertTrue; 023import static org.mockito.Mockito.mock; 024import static org.mockito.Mockito.when; 025 026import java.util.ArrayDeque; 027import java.util.ArrayList; 028import java.util.Arrays; 029import java.util.Collection; 030import java.util.Collections; 031import java.util.Deque; 032import java.util.HashMap; 033import java.util.List; 034import java.util.Map; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.RegionMetrics; 038import org.apache.hadoop.hbase.ServerName; 039import org.apache.hadoop.hbase.Size; 040import org.apache.hadoop.hbase.TableName; 041import org.apache.hadoop.hbase.client.RegionInfo; 042import org.apache.hadoop.hbase.testclassification.MasterTests; 043import org.apache.hadoop.hbase.testclassification.MediumTests; 044import org.apache.hadoop.hbase.util.Pair; 045import org.junit.jupiter.api.BeforeAll; 046import org.junit.jupiter.api.BeforeEach; 047import org.junit.jupiter.api.Tag; 048import org.junit.jupiter.api.Test; 049 050@Tag(MasterTests.TAG) 051@Tag(MediumTests.TAG) 052public class TestCacheAwareLoadBalancerCostFunctions extends StochasticBalancerTestBase { 053 054 // Mapping of test -> expected cache cost 055 private final float[] expectedCacheCost = { 0.0f, 0.0f, 0.5f, 1.0f, 0.0f, 0.572f, 0.0f, 0.075f }; 056 057 /** 058 * Data set to testCacheCost: [test][0][0] = mapping of server to number of regions it hosts 059 * [test][region + 1][0] = server that region is hosted on [test][region + 1][server + 1] = size 060 * of region cached on server 061 */ 062 private final int[][][] clusterRegionCacheRatioMocks = new int[][][] { 063 // Test 1: each region is entirely on server that hosts it 064 // Cost of moving the regions in this case should be high as the regions are fully cached 065 // on the server they are currently hosted on 066 new int[][] { new int[] { 2, 1, 1 }, // Server 0 has 2, server 1 has 1 and server 2 has 1 067 // region(s) hosted respectively 068 new int[] { 0, 100, 0, 0 }, // region 0 is hosted and cached only on server 0 069 new int[] { 0, 100, 0, 0 }, // region 1 is hosted and cached only on server 0 070 new int[] { 1, 0, 100, 0 }, // region 2 is hosted and cached only on server 1 071 new int[] { 2, 0, 0, 100 }, // region 3 is hosted and cached only on server 2 072 }, 073 074 // Test 2: each region is cached completely on the server it is currently hosted on, 075 // but it was also cached on some other server historically 076 // Cost of moving the regions in this case should be high as the regions are fully cached 077 // on the server they are currently hosted on. Although, the regions were previously hosted and 078 // cached on some other server, since they are completely cached on the new server, 079 // there is no need to move the regions back to the previously hosting cluster 080 new int[][] { new int[] { 1, 2, 1 }, // Server 0 has 1, server 1 has 2 and server 2 has 1 081 // region(s) hosted respectively 082 new int[] { 0, 100, 0, 100 }, // region 0 is hosted and currently cached on server 0, 083 // but previously cached completely on server 2 084 new int[] { 1, 100, 100, 0 }, // region 1 is hosted and currently cached on server 1, 085 // but previously cached completely on server 0 086 new int[] { 1, 0, 100, 100 }, // region 2 is hosted and currently cached on server 1, 087 // but previously cached on server 2 088 new int[] { 2, 0, 100, 100 }, // region 3 is hosted and currently cached on server 2, 089 // but previously cached on server 1 090 }, 091 092 // Test 3: The regions were hosted and fully cached on a server but later moved to other 093 // because of server crash procedure. The regions are partially cached on the server they 094 // are currently hosted on 095 new int[][] { new int[] { 1, 2, 1 }, new int[] { 0, 50, 0, 100 }, // Region 0 is currently 096 // hosted and partially 097 // cached on 098 // server 0, but was fully 099 // cached on server 2 100 // previously 101 new int[] { 1, 100, 50, 0 }, // Region 1 is currently hosted and partially cached on 102 // server 1, but was fully cached on server 0 previously 103 new int[] { 1, 0, 50, 100 }, // Region 2 is currently hosted and partially cached on 104 // server 1, but was fully cached on server 2 previously 105 new int[] { 2, 0, 100, 50 }, // Region 3 is currently hosted and partially cached on 106 // server 2, but was fully cached on server 1 previously 107 }, 108 109 // Test 4: The regions were hosted and fully cached on a server, but later moved to other 110 // server because of server crash procedure. The regions are not at all cached on the server 111 // they are currently hosted on 112 new int[][] { new int[] { 1, 1, 2 }, new int[] { 0, 0, 0, 100 }, // Region 0 is currently hosted 113 // but not cached on server 114 // 0, 115 // but was fully cached on 116 // server 2 previously 117 new int[] { 1, 100, 0, 0 }, // Region 1 is currently hosted but not cached on server 1, 118 // but was fully cached on server 0 previously 119 new int[] { 2, 0, 100, 0 }, // Region 2 is currently hosted but not cached on server 2, 120 // but was fully cached on server 1 previously 121 new int[] { 2, 100, 0, 0 }, // Region 3 is currently hosted but not cached on server 2, 122 // but was fully cached on server 1 previously 123 }, 124 125 // Test 5: The regions were partially cached on old servers, before moving to the new server 126 // where also, they are partially cached 127 new int[][] { new int[] { 2, 1, 1 }, new int[] { 0, 50, 50, 0 }, // Region 0 is hosted and 128 // partially cached on 129 // server 0, but 130 // was previously hosted and 131 // partially cached on 132 // server 1 133 new int[] { 0, 50, 0, 50 }, // Region 1 is hosted and partially cached on server 0, but 134 // was previously hosted and partially cached on server 2 135 new int[] { 1, 0, 50, 50 }, // Region 2 is hosted and partially cached on server 1, but 136 // was previously hosted and partially cached on server 2 137 new int[] { 2, 0, 50, 50 }, // Region 3 is hosted and partially cached on server 2, but 138 // was previously hosted and partially cached on server 1 139 }, 140 141 // Test 6: The regions are less cached on the new servers as compared to what they were 142 // cached on the server before they were moved to the new servers 143 new int[][] { new int[] { 1, 2, 1 }, new int[] { 0, 30, 70, 0 }, // Region 0 is hosted and 144 // cached 30% on server 0, 145 // but was 146 // previously hosted and 147 // cached 70% on server 1 148 new int[] { 1, 70, 30, 0 }, // Region 1 is hosted and cached 30% on server 1, but was 149 // previously hosted and cached 70% on server 0 150 new int[] { 1, 0, 30, 70 }, // Region 2 is hosted and cached 30% on server 1, but was 151 // previously hosted and cached 70% on server 2 152 new int[] { 2, 0, 70, 30 }, // Region 3 is hosted and cached 30% on server 2, but was 153 // previously hosted and cached 70% on server 1 154 }, 155 156 // Test 7: The regions are more cached on the new servers as compared to what they were 157 // cached on the server before they were moved to the new servers 158 new int[][] { new int[] { 2, 1, 1 }, new int[] { 0, 80, 20, 0 }, // Region 0 is hosted and 80% 159 // cached on server 0, but 160 // was 161 // previously hosted and 20% 162 // cached on server 1 163 new int[] { 0, 80, 0, 20 }, // Region 1 is hosted and 80% cached on server 0, but was 164 // previously hosted and 20% cached on server 2 165 new int[] { 1, 20, 80, 0 }, // Region 2 is hosted and 80% cached on server 1, but was 166 // previously hosted and 20% cached on server 0 167 new int[] { 2, 0, 20, 80 }, // Region 3 is hosted and 80% cached on server 2, but was 168 // previously hosted and 20% cached on server 1 169 }, 170 171 // Test 8: The regions are randomly assigned to the server with some regions historically 172 // hosted on other region servers 173 new int[][] { new int[] { 1, 2, 1 }, new int[] { 0, 34, 0, 58 }, // Region 0 is hosted and 174 // partially cached on 175 // server 0, 176 // but was previously hosted 177 // and partially cached on 178 // server 2 179 // current cache ratio < 180 // historical cache ratio 181 new int[] { 1, 78, 100, 0 }, // Region 1 is hosted and fully cached on server 1, 182 // but was previously hosted and partially cached on server 0 183 // current cache ratio > historical cache ratio 184 new int[] { 1, 66, 66, 0 }, // Region 2 is hosted and partially cached on server 1, 185 // but was previously hosted and partially cached on server 0 186 // current cache ratio == historical cache ratio 187 new int[] { 2, 0, 0, 96 }, // Region 3 is hosted and partially cached on server 0 188 // No historical cache ratio 189 }, }; 190 191 private static Configuration storedConfiguration; 192 193 private CacheAwareLoadBalancer loadBalancer = new CacheAwareLoadBalancer(); 194 195 @BeforeAll 196 public static void saveInitialConfiguration() { 197 storedConfiguration = new Configuration(conf); 198 } 199 200 @BeforeEach 201 public void beforeEachTest() { 202 conf = new Configuration(storedConfiguration); 203 loadBalancer.loadConf(conf); 204 } 205 206 @Test 207 public void testVerifyCacheAwareSkewnessCostFunctionEnabled() { 208 CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer(); 209 lb.loadConf(conf); 210 assertTrue(Arrays.asList(lb.getCostFunctionNames()) 211 .contains(CacheAwareLoadBalancer.CacheAwareRegionSkewnessCostFunction.class.getSimpleName())); 212 } 213 214 @Test 215 public void testVerifyCacheAwareSkewnessCostFunctionDisabled() { 216 conf.setFloat( 217 CacheAwareLoadBalancer.CacheAwareRegionSkewnessCostFunction.REGION_COUNT_SKEW_COST_KEY, 0.0f); 218 219 CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer(); 220 lb.loadConf(conf); 221 222 assertFalse(Arrays.asList(lb.getCostFunctionNames()) 223 .contains(CacheAwareLoadBalancer.CacheAwareRegionSkewnessCostFunction.class.getSimpleName())); 224 } 225 226 @Test 227 public void testVerifyCacheCostFunctionEnabled() { 228 conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); 229 230 CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer(); 231 lb.loadConf(conf); 232 233 assertTrue(Arrays.asList(lb.getCostFunctionNames()) 234 .contains(CacheAwareLoadBalancer.CacheAwareCostFunction.class.getSimpleName())); 235 } 236 237 @Test 238 public void testVerifyCacheCostFunctionDisabledByNoBucketCachePersistence() { 239 assertFalse(Arrays.asList(loadBalancer.getCostFunctionNames()) 240 .contains(CacheAwareLoadBalancer.CacheAwareCostFunction.class.getSimpleName())); 241 } 242 243 @Test 244 public void testVerifyCacheCostFunctionDisabledByNoMultiplier() { 245 conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); 246 conf.setFloat("hbase.master.balancer.stochastic.cacheCost", 0.0f); 247 assertFalse(Arrays.asList(loadBalancer.getCostFunctionNames()) 248 .contains(CacheAwareLoadBalancer.CacheAwareCostFunction.class.getSimpleName())); 249 } 250 251 @Test 252 public void testCacheCost() { 253 conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); 254 CacheAwareLoadBalancer.CacheAwareCostFunction costFunction = 255 new CacheAwareLoadBalancer.CacheAwareCostFunction(conf); 256 257 for (int test = 0; test < clusterRegionCacheRatioMocks.length; test++) { 258 int[][] clusterRegionLocations = clusterRegionCacheRatioMocks[test]; 259 MockClusterForCacheCost cluster = new MockClusterForCacheCost(clusterRegionLocations); 260 costFunction.prepare(cluster); 261 double cost = costFunction.cost(); 262 assertEquals(expectedCacheCost[test], cost, 0.01); 263 } 264 } 265 266 /** 267 * When block-cache persistence, cold regions (below 268 * {@link CacheAwareLoadBalancer#LOW_CACHE_RATIO_FOR_RELOCATION_KEY}) together with RS-reported 269 * block-cache free bytes inflate plausible best placement so weighted cache cost crosses 270 * {@code minCostNeedBalance}; {@link StochasticLoadBalancer#needsBalance} returns true even with 271 * evenly spread region-count skew. 272 */ 273 @Test 274 public void testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace() { 275 conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); 276 CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf); 277 int regionSizeMb = 64; 278 long cacheFreeInBytes = regionSizeMb * 1024L * 1024L; 279 // simulates a cache ratio lower than 280 // CacheAwareLoadBalancer.LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT 281 float simulatedCacheRatio = 0.1f; 282 Map<ServerName, List<RegionInfo>> clusterServers = 283 mockClusterServersUnsorted(new int[] { 1, 1 }, 1); 284 List<RegionInfo> regions = new ArrayList<>(); 285 clusterServers.values().forEach(regions::addAll); 286 List<ServerName> serversList = getServersInInsertionOrder(clusterServers); 287 Map<ServerName, Long> blockCacheFree = new HashMap<>(); 288 blockCacheFree.put(serversList.get(0), 0L); 289 blockCacheFree.put(serversList.get(1), cacheFreeInBytes); 290 BalancerClusterState cluster = new BalancerClusterState(clusterServers, 291 buildRegionLoads(regions, simulatedCacheRatio, regionSizeMb), null, null, 292 Collections.emptyMap(), blockCacheFree); 293 lb.initCosts(cluster); 294 assertTrue(lb.needsBalance( 295 TableName.valueOf("testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace"), 296 cluster)); 297 } 298 299 /** 300 * Checks that needsBalance isn't true when regions report high cache ratios 301 */ 302 @Test 303 public void testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace() { 304 conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); 305 CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf); 306 int regionSizeMb = 64; 307 long cacheFreeInBytes = regionSizeMb * 1024L * 1024L; 308 Map<ServerName, List<RegionInfo>> clusterServers = 309 mockClusterServersUnsorted(new int[] { 1, 1 }, 1); 310 List<RegionInfo> all = new ArrayList<>(); 311 clusterServers.values().forEach(all::addAll); 312 List<ServerName> serversList = getServersInInsertionOrder(clusterServers); 313 Map<ServerName, Long> blockCacheFree = new HashMap<>(); 314 blockCacheFree.put(serversList.get(0), cacheFreeInBytes + 1024 * 1024); 315 blockCacheFree.put(serversList.get(1), cacheFreeInBytes + 1024 * 1024); 316 BalancerClusterState cluster = 317 new BalancerClusterState(clusterServers, buildRegionLoads(all, 1.0f, regionSizeMb), null, 318 null, Collections.emptyMap(), blockCacheFree); 319 lb.initCosts(cluster); 320 assertFalse(lb.needsBalance( 321 TableName.valueOf("testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace"), 322 cluster)); 323 } 324 325 private static CacheAwareLoadBalancer newCacheAwareBalancer(Configuration cfg) { 326 CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer(); 327 lb.setClusterInfoProvider(new DummyClusterInfoProvider(cfg)); 328 lb.loadConf(cfg); 329 return lb; 330 } 331 332 private static Map<String, Deque<BalancerRegionLoad>> 333 buildRegionLoads(Collection<RegionInfo> regions, float cachedRatio, int regionSizeMb) { 334 RegionMetrics rm = mock(RegionMetrics.class); 335 when(rm.getReadRequestCount()).thenReturn(0L); 336 when(rm.getCpRequestCount()).thenReturn(0L); 337 when(rm.getWriteRequestCount()).thenReturn(0L); 338 when(rm.getMemStoreSize()).thenReturn(Size.ZERO); 339 when(rm.getStoreFileSize()).thenReturn(Size.ZERO); 340 when(rm.getRegionSizeMB()).thenReturn(new Size(regionSizeMb, Size.Unit.MEGABYTE)); 341 when(rm.getCurrentRegionCachedRatio()).thenReturn(cachedRatio); 342 343 BalancerRegionLoad brl = new BalancerRegionLoad(rm); 344 Map<String, Deque<BalancerRegionLoad>> loads = new HashMap<>(); 345 for (RegionInfo ri : regions) { 346 ArrayDeque<BalancerRegionLoad> dq = new ArrayDeque<>(1); 347 dq.add(brl); 348 loads.put(ri.getRegionNameAsString(), dq); 349 loads.put(ri.getEncodedName(), dq); 350 } 351 return loads; 352 } 353 354 private static List<ServerName> 355 getServersInInsertionOrder(Map<ServerName, List<RegionInfo>> cluster) { 356 return new ArrayList<>(cluster.keySet()); 357 } 358 359 private class MockClusterForCacheCost extends BalancerClusterState { 360 private final Map<Pair<Integer, Integer>, Float> regionServerCacheRatio = new HashMap<>(); 361 362 public MockClusterForCacheCost(int[][] regionsArray) { 363 // regions[0] is an array where index = serverIndex and value = number of regions 364 super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null, null, null); 365 Map<String, Pair<ServerName, Float>> oldCacheRatio = new HashMap<>(); 366 for (int i = 1; i < regionsArray.length; i++) { 367 int regionIndex = i - 1; 368 for (int j = 1; j < regionsArray[i].length; j++) { 369 int serverIndex = j - 1; 370 float cacheRatio = (float) regionsArray[i][j] / 100; 371 regionServerCacheRatio.put(new Pair<>(regionIndex, serverIndex), cacheRatio); 372 if (cacheRatio > 0.0f && serverIndex != regionsArray[i][0]) { 373 // This is the historical cacheRatio value 374 oldCacheRatio.put(regions[regionIndex].getEncodedName(), 375 new Pair<>(servers[serverIndex], cacheRatio)); 376 } 377 } 378 } 379 regionCacheRatioOnOldServerMap = oldCacheRatio; 380 } 381 382 @Override 383 public int getTotalRegionHFileSizeMB(int region) { 384 return 1; 385 } 386 387 @Override 388 protected float getRegionCacheRatioOnRegionServer(int region, int regionServerIndex) { 389 float cacheRatio = 0.0f; 390 391 // Get the cache ratio if the region is currently hosted on this server 392 if (regionServerIndex == regionIndexToServerIndex[region]) { 393 return regionServerCacheRatio.get(new Pair<>(region, regionServerIndex)); 394 } 395 396 // Region is not currently hosted on this server. Check if the region was cached on this 397 // server earlier. This can happen when the server was shutdown and the cache was persisted. 398 // Search using the index name and server name and not the index id and server id as these 399 // ids may change when a server is marked as dead or a new server is added. 400 String regionEncodedName = regions[region].getEncodedName(); 401 ServerName serverName = servers[regionServerIndex]; 402 if ( 403 regionCacheRatioOnOldServerMap != null 404 && regionCacheRatioOnOldServerMap.containsKey(regionEncodedName) 405 ) { 406 Pair<ServerName, Float> serverCacheRatio = 407 regionCacheRatioOnOldServerMap.get(regionEncodedName); 408 if (ServerName.isSameAddress(serverName, serverCacheRatio.getFirst())) { 409 cacheRatio = serverCacheRatio.getSecond(); 410 regionCacheRatioOnOldServerMap.remove(regionEncodedName); 411 } 412 } 413 return cacheRatio; 414 } 415 } 416}