001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.io.hfile.HFileInfo.FILE_PATH; 021import static org.apache.hadoop.hbase.io.hfile.HFileInfo.FILE_SIZE; 022 023import java.util.ArrayList; 024import java.util.Collection; 025import java.util.Date; 026import java.util.HashMap; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.concurrent.ConcurrentHashMap; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; 035import org.apache.hadoop.hbase.io.hfile.HFileInfo; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 038import org.apache.hadoop.hbase.util.Pair; 039import org.apache.yetus.audience.InterfaceAudience; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** 044 * The DataTieringManager class categorizes data into hot data and cold data based on the specified 045 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with 046 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} 047 * determines the logic for distinguishing data into hot or cold. By default, all data is considered 048 * as hot. 049 */ 050@InterfaceAudience.Private 051public class DataTieringManager { 052 private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); 053 public static final String GLOBAL_DATA_TIERING_ENABLED_KEY = 054 "hbase.regionserver.datatiering.enable"; 055 public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default 056 public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; 057 public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY = 058 "hbase.hstore.datatiering.grace.period.millis"; 059 public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0; 060 public static final String DATATIERING_HOT_DATA_AGE_KEY = 061 "hbase.hstore.datatiering.hot.age.millis"; 062 public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; 063 public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days 064 private static DataTieringManager instance; 065 private final Map<String, HRegion> onlineRegions; 066 067 // Accounts for the total size of cold data in each region, together with a list of cold files in 068 // that region. 069 private final Map<String, Pair<List<String>, Long>> regionColdDataSize = 070 new ConcurrentHashMap<>(); 071 072 private DataTieringManager(Map<String, HRegion> onlineRegions) { 073 this.onlineRegions = onlineRegions; 074 } 075 076 /** 077 * Initializes the DataTieringManager instance with the provided map of online regions, only if 078 * the configuration "hbase.regionserver.datatiering.enable" is enabled. 079 * @param conf Configuration object. 080 * @param onlineRegions A map containing online regions. 081 * @return True if the instance is instantiated successfully, false otherwise. 082 */ 083 public static synchronized boolean instantiate(Configuration conf, 084 Map<String, HRegion> onlineRegions) { 085 if (!isDataTieringFeatureEnabled(conf)) { 086 LOG.debug("DataTiering feature is disabled (key: {}). Skipping instantiation.", 087 GLOBAL_DATA_TIERING_ENABLED_KEY); 088 return false; 089 } 090 if (instance != null) { 091 LOG.warn("DataTieringManager is already instantiated."); 092 return false; 093 } 094 instance = new DataTieringManager(onlineRegions); 095 LOG.info("DataTieringManager instantiated successfully."); 096 return true; 097 } 098 099 /** 100 * Retrieves the instance of DataTieringManager. 101 * @return The instance of DataTieringManager, if instantiated, null otherwise. 102 */ 103 public static synchronized DataTieringManager getInstance() { 104 return instance; 105 } 106 107 /** 108 * Determines whether data tiering is enabled for the given block cache key. 109 * @param key the block cache key 110 * @return {@code true} if data tiering is enabled for the HFile associated with the key, 111 * {@code false} otherwise 112 * @throws DataTieringException if there is an error retrieving the HFile path or configuration 113 */ 114 public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { 115 if (key.getCfName() == null || key.getRegionName() == null) { 116 throw new DataTieringException( 117 "BlockCacheKey doesn't contain Column Family Name or Region Name"); 118 } 119 Configuration configuration = 120 getHStore(key.getRegionName(), key.getCfName()).getReadOnlyConfiguration(); 121 DataTieringType dataTieringType = getDataTieringType(configuration); 122 return !dataTieringType.equals(DataTieringType.NONE); 123 } 124 125 /** 126 * Determines whether data tiering is enabled for the given HFile path. 127 * @param hFilePath the path to the HFile 128 * @return {@code true} if data tiering is enabled, {@code false} otherwise 129 * @throws DataTieringException if there is an error retrieving the configuration 130 */ 131 public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { 132 Configuration configuration = getConfiguration(hFilePath); 133 DataTieringType dataTieringType = getDataTieringType(configuration); 134 return !dataTieringType.equals(DataTieringType.NONE); 135 } 136 137 /** 138 * Determines whether the data associated with the given block cache key is considered hot. If the 139 * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not 140 * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by 141 * default. 142 * @param key the block cache key 143 * @return {@code true} if the data is hot, {@code false} otherwise 144 * @throws DataTieringException if there is an error retrieving data tiering information 145 */ 146 public boolean isHotData(BlockCacheKey key) throws DataTieringException { 147 if (key.getRegionName() == null) { 148 throw new DataTieringException("BlockCacheKey doesn't contain Region Name"); 149 } 150 if (key.getCfName() == null) { 151 throw new DataTieringException("BlockCacheKey doesn't contain CF Name"); 152 } 153 if (key.getHfileName() == null) { 154 throw new DataTieringException("BlockCacheKey doesn't contain File Name"); 155 } 156 return isHotData(key.getRegionName(), key.getCfName(), key.getHfileName()); 157 } 158 159 /** 160 * Determines whether the data associated with the given time range tracker is considered hot. If 161 * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum 162 * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers 163 * the data as hot by default. 164 * @param maxTimestamp the maximum timestamp associated with the data. 165 * @param conf The configuration object to use for determining hot data criteria. 166 * @return {@code true} if the data is hot, {@code false} otherwise 167 */ 168 public boolean isHotData(long maxTimestamp, Configuration conf) { 169 if (isWithinGracePeriod(maxTimestamp, conf)) { 170 return true; 171 } 172 DataTieringType dataTieringType = getDataTieringType(conf); 173 174 if ( 175 !dataTieringType.equals(DataTieringType.NONE) 176 && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP 177 ) { 178 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf)); 179 } 180 // DataTieringType.NONE or other types are considered hot by default 181 return true; 182 } 183 184 private boolean isHotData(String region, String cf, String fileName) throws DataTieringException { 185 Configuration configuration = getHStore(region, cf).getReadOnlyConfiguration(); 186 DataTieringType dataTieringType = getDataTieringType(configuration); 187 if (!dataTieringType.equals(DataTieringType.NONE)) { 188 HStoreFile hStoreFile = getHStoreFile(region, cf, fileName); 189 if (hStoreFile == null) { 190 throw new DataTieringException( 191 "Store file corresponding to " + region + "/" + cf + "/" + fileName + " doesn't exist"); 192 } 193 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 194 if (isWithinGracePeriod(maxTimestamp, configuration)) { 195 return true; 196 } 197 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 198 } 199 // DataTieringType.NONE or other types are considered hot by default 200 return true; 201 } 202 203 /** 204 * Determines whether the data in the HFile being read is considered hot based on the configured 205 * data tiering type and hot data age. If the data tiering type is set to 206 * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers 207 * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. 208 * @param hFileInfo Information about the HFile to determine if its data is hot. 209 * @param configuration The configuration object to use for determining hot data criteria. 210 * @return {@code true} if the data is hot, {@code false} otherwise 211 */ 212 public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) { 213 DataTieringType dataTieringType = getDataTieringType(configuration); 214 if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) { 215 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo); 216 if (isWithinGracePeriod(maxTimestamp, configuration)) { 217 return true; 218 } 219 LOG.debug("Max TS: {} for file {}. Cutoff Age TS: {}", maxTimestamp, 220 hFileInfo.getHFileContext().getHFileName(), getDataTieringHotDataAge(configuration)); 221 boolean isHot = hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 222 if (!isHot) { 223 Path path = new Path(Bytes.toString(hFileInfo.get(FILE_PATH))); 224 String regionName = path.getParent().getParent().getName(); 225 regionColdDataSize.compute(regionName, (k, v) -> { 226 if (v == null) { 227 List<String> files = new ArrayList<>(); 228 files.add(hFileInfo.getHFileContext().getHFileName()); 229 LOG.debug("computing file {} with size {} as cold data for region {}", 230 hFileInfo.getHFileContext().getHFileName(), Bytes.toLong(hFileInfo.get(FILE_SIZE)), 231 regionName); 232 return new Pair<>(files, Bytes.toLong(hFileInfo.get(FILE_SIZE))); 233 } else { 234 if (!v.getFirst().contains(hFileInfo.getHFileContext().getHFileName())) { 235 v.getFirst().add(hFileInfo.getHFileContext().getHFileName()); 236 v.setSecond(v.getSecond() + Bytes.toLong(hFileInfo.get(FILE_SIZE))); 237 LOG.debug( 238 "adding file {} with size {} as cold data for region {}. Total cold data size for the region is {}", 239 hFileInfo.getHFileContext().getHFileName(), Bytes.toLong(hFileInfo.get(FILE_SIZE)), 240 regionName, v.getSecond()); 241 } 242 return v; 243 } 244 }); 245 } 246 return isHot; 247 } 248 // DataTieringType.NONE or other types are considered hot by default 249 return true; 250 } 251 252 private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) { 253 long gracePeriod = getDataTieringGracePeriod(conf); 254 return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod; 255 } 256 257 private boolean hotDataValidator(long maxTimestamp, long hotDataAge) { 258 long currentTimestamp = getCurrentTimestamp(); 259 long diff = currentTimestamp - maxTimestamp; 260 return diff <= hotDataAge; 261 } 262 263 private long getCurrentTimestamp() { 264 return EnvironmentEdgeManager.getDelegate().currentTime(); 265 } 266 267 /** 268 * Returns a set of cold data filenames from the given set of cached blocks. Cold data is 269 * determined by the configured data tiering type and hot data age. 270 * @param allCachedBlocks a set of all cached block cache keys 271 * @return a set of cold data filenames 272 * @throws DataTieringException if there is an error determining whether a block is hot 273 */ 274 public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks) 275 throws DataTieringException { 276 Set<String> coldHFiles = new HashSet<>(); 277 for (BlockCacheKey key : allCachedBlocks) { 278 if (coldHFiles.contains(key.getHfileName())) { 279 continue; 280 } 281 if (!isHotData(key)) { 282 coldHFiles.add(key.getHfileName()); 283 } 284 } 285 return coldHFiles; 286 } 287 288 private HRegion getHRegion(String region) throws DataTieringException { 289 HRegion hRegion = this.onlineRegions.get(region); 290 if (hRegion == null) { 291 throw new DataTieringException("HRegion corresponding to " + region + " doesn't exist"); 292 } 293 return hRegion; 294 } 295 296 private HStore getHStore(String region, String cf) throws DataTieringException { 297 HRegion hRegion = getHRegion(region); 298 HStore hStore = hRegion.getStore(Bytes.toBytes(cf)); 299 if (hStore == null) { 300 throw new DataTieringException( 301 "HStore corresponding to " + region + "/" + cf + " doesn't exist"); 302 } 303 return hStore; 304 } 305 306 private HStoreFile getHStoreFile(String region, String cf, String fileName) 307 throws DataTieringException { 308 HStore hStore = getHStore(region, cf); 309 for (HStoreFile file : hStore.getStorefiles()) { 310 if (file.getPath().getName().equals(fileName)) { 311 return file; 312 } 313 } 314 return null; 315 } 316 317 private Configuration getConfiguration(Path hFilePath) throws DataTieringException { 318 String regionName = null; 319 String cfName = null; 320 try { 321 regionName = hFilePath.getParent().getParent().getName(); 322 cfName = hFilePath.getParent().getName(); 323 } catch (Exception e) { 324 throw new DataTieringException("Incorrect HFile Path: " + hFilePath); 325 } 326 if (regionName == null || cfName == null) { 327 throw new DataTieringException("Incorrect HFile Path: " + hFilePath); 328 } 329 HStore hStore = getHStore(regionName, cfName); 330 return hStore.getReadOnlyConfiguration(); 331 } 332 333 private DataTieringType getDataTieringType(Configuration conf) { 334 return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); 335 } 336 337 private long getDataTieringHotDataAge(Configuration conf) { 338 return Long.parseLong( 339 conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); 340 } 341 342 private long getDataTieringGracePeriod(Configuration conf) { 343 return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY, 344 String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD))); 345 } 346 347 /* 348 * This API traverses through the list of online regions and returns a subset of these files-names 349 * that are cold. 350 * @return List of names of files with cold data as per data-tiering logic. 351 */ 352 public Map<String, String> getColdFilesList() { 353 Map<String, String> coldFiles = new HashMap<>(); 354 for (HRegion r : this.onlineRegions.values()) { 355 for (HStore hStore : r.getStores()) { 356 Configuration conf = hStore.getReadOnlyConfiguration(); 357 DataTieringType dataTieringType = getDataTieringType(conf); 358 if (dataTieringType == DataTieringType.NONE) { 359 // Data-Tiering not enabled for the store. Just skip it. 360 continue; 361 } 362 Long hotDataAge = getDataTieringHotDataAge(conf); 363 364 for (HStoreFile hStoreFile : hStore.getStorefiles()) { 365 String hFileName = 366 hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName(); 367 long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 368 LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp)); 369 long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); 370 long fileAge = currentTimestamp - maxTimeStamp; 371 if (fileAge > hotDataAge) { 372 // Values do not matter. 373 coldFiles.put(hFileName, null); 374 } 375 } 376 } 377 } 378 return coldFiles; 379 } 380 381 private static boolean isDataTieringFeatureEnabled(Configuration conf) { 382 return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 383 DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED); 384 } 385 386 // Resets the instance to null. To be used only for testing. 387 public static void resetForTestingOnly() { 388 instance = null; 389 } 390 391 public Map<String, Pair<List<String>, Long>> getRegionColdDataSize() { 392 return regionColdDataSize; 393 } 394 395 /** 396 * Updates regionColdData size for the region containing the passed compactedFiles. 397 */ 398 public void updateRegionColdDataSize(String encodedRegionName, 399 Collection<HStoreFile> compactedFiles, Collection<HStoreFile> newFiles) { 400 regionColdDataSize.computeIfPresent(encodedRegionName, (k, v) -> { 401 for (HStoreFile file : compactedFiles) { 402 if (v.getFirst().contains(file.getPath().getName())) { 403 v.getFirst().remove(file.getPath().getName()); 404 v.setSecond(v.getSecond() - Bytes.toLong(file.getMetadataValue(FILE_SIZE))); 405 } 406 } 407 for (HStoreFile file : newFiles) { 408 // call isHotData to account for the new file size in regionColdDataSize, if the new file is 409 // considered cold data as per data-tiering logic. 410 isHotData(file.getFileInfo().getHFileInfo(), file.getFileInfo().getConf()); 411 } 412 return v; 413 }); 414 } 415}