001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.util.Date; 021import java.util.HashMap; 022import java.util.HashSet; 023import java.util.Map; 024import java.util.Set; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; 028import org.apache.hadoop.hbase.io.hfile.HFileInfo; 029import org.apache.hadoop.hbase.util.Bytes; 030import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 031import org.apache.yetus.audience.InterfaceAudience; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035/** 036 * The DataTieringManager class categorizes data into hot data and cold data based on the specified 037 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with 038 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} 039 * determines the logic for distinguishing data into hot or cold. By default, all data is considered 040 * as hot. 041 */ 042@InterfaceAudience.Private 043public class DataTieringManager { 044 private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); 045 public static final String GLOBAL_DATA_TIERING_ENABLED_KEY = 046 "hbase.regionserver.datatiering.enable"; 047 public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default 048 public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; 049 public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY = 050 "hbase.hstore.datatiering.grace.period.millis"; 051 public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0; 052 public static final String DATATIERING_HOT_DATA_AGE_KEY = 053 "hbase.hstore.datatiering.hot.age.millis"; 054 public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; 055 public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days 056 private static DataTieringManager instance; 057 private final Map<String, HRegion> onlineRegions; 058 059 private DataTieringManager(Map<String, HRegion> onlineRegions) { 060 this.onlineRegions = onlineRegions; 061 } 062 063 /** 064 * Initializes the DataTieringManager instance with the provided map of online regions, only if 065 * the configuration "hbase.regionserver.datatiering.enable" is enabled. 066 * @param conf Configuration object. 067 * @param onlineRegions A map containing online regions. 068 * @return True if the instance is instantiated successfully, false otherwise. 069 */ 070 public static synchronized boolean instantiate(Configuration conf, 071 Map<String, HRegion> onlineRegions) { 072 if (!isDataTieringFeatureEnabled(conf)) { 073 LOG.debug("DataTiering feature is disabled (key: {}). Skipping instantiation.", 074 GLOBAL_DATA_TIERING_ENABLED_KEY); 075 return false; 076 } 077 if (instance != null) { 078 LOG.warn("DataTieringManager is already instantiated."); 079 return false; 080 } 081 instance = new DataTieringManager(onlineRegions); 082 LOG.info("DataTieringManager instantiated successfully."); 083 return true; 084 } 085 086 /** 087 * Retrieves the instance of DataTieringManager. 088 * @return The instance of DataTieringManager, if instantiated, null otherwise. 089 */ 090 public static synchronized DataTieringManager getInstance() { 091 return instance; 092 } 093 094 /** 095 * Determines whether data tiering is enabled for the given block cache key. 096 * @param key the block cache key 097 * @return {@code true} if data tiering is enabled for the HFile associated with the key, 098 * {@code false} otherwise 099 * @throws DataTieringException if there is an error retrieving the HFile path or configuration 100 */ 101 public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { 102 if (key.getCfName() == null || key.getRegionName() == null) { 103 throw new DataTieringException( 104 "BlockCacheKey doesn't contain Column Family Name or Region Name"); 105 } 106 Configuration configuration = 107 getHStore(key.getRegionName(), key.getCfName()).getReadOnlyConfiguration(); 108 DataTieringType dataTieringType = getDataTieringType(configuration); 109 return !dataTieringType.equals(DataTieringType.NONE); 110 } 111 112 /** 113 * Determines whether data tiering is enabled for the given HFile path. 114 * @param hFilePath the path to the HFile 115 * @return {@code true} if data tiering is enabled, {@code false} otherwise 116 * @throws DataTieringException if there is an error retrieving the configuration 117 */ 118 public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { 119 Configuration configuration = getConfiguration(hFilePath); 120 DataTieringType dataTieringType = getDataTieringType(configuration); 121 return !dataTieringType.equals(DataTieringType.NONE); 122 } 123 124 /** 125 * Determines whether the data associated with the given block cache key is considered hot. If the 126 * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not 127 * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by 128 * default. 129 * @param key the block cache key 130 * @return {@code true} if the data is hot, {@code false} otherwise 131 * @throws DataTieringException if there is an error retrieving data tiering information 132 */ 133 public boolean isHotData(BlockCacheKey key) throws DataTieringException { 134 if (key.getRegionName() == null) { 135 throw new DataTieringException("BlockCacheKey doesn't contain Region Name"); 136 } 137 if (key.getCfName() == null) { 138 throw new DataTieringException("BlockCacheKey doesn't contain CF Name"); 139 } 140 if (key.getHfileName() == null) { 141 throw new DataTieringException("BlockCacheKey doesn't contain File Name"); 142 } 143 return isHotData(key.getRegionName(), key.getCfName(), key.getHfileName()); 144 } 145 146 /** 147 * Determines whether the data associated with the given time range tracker is considered hot. If 148 * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum 149 * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers 150 * the data as hot by default. 151 * @param maxTimestamp the maximum timestamp associated with the data. 152 * @param conf The configuration object to use for determining hot data criteria. 153 * @return {@code true} if the data is hot, {@code false} otherwise 154 */ 155 public boolean isHotData(long maxTimestamp, Configuration conf) { 156 if (isWithinGracePeriod(maxTimestamp, conf)) { 157 return true; 158 } 159 DataTieringType dataTieringType = getDataTieringType(conf); 160 161 if ( 162 !dataTieringType.equals(DataTieringType.NONE) 163 && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP 164 ) { 165 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf)); 166 } 167 // DataTieringType.NONE or other types are considered hot by default 168 return true; 169 } 170 171 private boolean isHotData(String region, String cf, String fileName) throws DataTieringException { 172 Configuration configuration = getHStore(region, cf).getReadOnlyConfiguration(); 173 DataTieringType dataTieringType = getDataTieringType(configuration); 174 if (!dataTieringType.equals(DataTieringType.NONE)) { 175 HStoreFile hStoreFile = getHStoreFile(region, cf, fileName); 176 if (hStoreFile == null) { 177 throw new DataTieringException( 178 "Store file corresponding to " + region + "/" + cf + "/" + fileName + " doesn't exist"); 179 } 180 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 181 if (isWithinGracePeriod(maxTimestamp, configuration)) { 182 return true; 183 } 184 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 185 } 186 // DataTieringType.NONE or other types are considered hot by default 187 return true; 188 } 189 190 /** 191 * Determines whether the data in the HFile being read is considered hot based on the configured 192 * data tiering type and hot data age. If the data tiering type is set to 193 * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers 194 * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. 195 * @param hFileInfo Information about the HFile to determine if its data is hot. 196 * @param configuration The configuration object to use for determining hot data criteria. 197 * @return {@code true} if the data is hot, {@code false} otherwise 198 */ 199 public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) { 200 DataTieringType dataTieringType = getDataTieringType(configuration); 201 if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) { 202 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo); 203 if (isWithinGracePeriod(maxTimestamp, configuration)) { 204 return true; 205 } 206 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 207 } 208 // DataTieringType.NONE or other types are considered hot by default 209 return true; 210 } 211 212 private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) { 213 long gracePeriod = getDataTieringGracePeriod(conf); 214 return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod; 215 } 216 217 private boolean hotDataValidator(long maxTimestamp, long hotDataAge) { 218 long currentTimestamp = getCurrentTimestamp(); 219 long diff = currentTimestamp - maxTimestamp; 220 return diff <= hotDataAge; 221 } 222 223 private long getCurrentTimestamp() { 224 return EnvironmentEdgeManager.getDelegate().currentTime(); 225 } 226 227 /** 228 * Returns a set of cold data filenames from the given set of cached blocks. Cold data is 229 * determined by the configured data tiering type and hot data age. 230 * @param allCachedBlocks a set of all cached block cache keys 231 * @return a set of cold data filenames 232 * @throws DataTieringException if there is an error determining whether a block is hot 233 */ 234 public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks) 235 throws DataTieringException { 236 Set<String> coldHFiles = new HashSet<>(); 237 for (BlockCacheKey key : allCachedBlocks) { 238 if (coldHFiles.contains(key.getHfileName())) { 239 continue; 240 } 241 if (!isHotData(key)) { 242 coldHFiles.add(key.getHfileName()); 243 } 244 } 245 return coldHFiles; 246 } 247 248 private HRegion getHRegion(String region) throws DataTieringException { 249 HRegion hRegion = this.onlineRegions.get(region); 250 if (hRegion == null) { 251 throw new DataTieringException("HRegion corresponding to " + region + " doesn't exist"); 252 } 253 return hRegion; 254 } 255 256 private HStore getHStore(String region, String cf) throws DataTieringException { 257 HRegion hRegion = getHRegion(region); 258 HStore hStore = hRegion.getStore(Bytes.toBytes(cf)); 259 if (hStore == null) { 260 throw new DataTieringException( 261 "HStore corresponding to " + region + "/" + cf + " doesn't exist"); 262 } 263 return hStore; 264 } 265 266 private HStoreFile getHStoreFile(String region, String cf, String fileName) 267 throws DataTieringException { 268 HStore hStore = getHStore(region, cf); 269 for (HStoreFile file : hStore.getStorefiles()) { 270 if (file.getPath().getName().equals(fileName)) { 271 return file; 272 } 273 } 274 return null; 275 } 276 277 private Configuration getConfiguration(Path hFilePath) throws DataTieringException { 278 String regionName = null; 279 String cfName = null; 280 try { 281 regionName = hFilePath.getParent().getParent().getName(); 282 cfName = hFilePath.getParent().getName(); 283 } catch (Exception e) { 284 throw new DataTieringException("Incorrect HFile Path: " + hFilePath); 285 } 286 if (regionName == null || cfName == null) { 287 throw new DataTieringException("Incorrect HFile Path: " + hFilePath); 288 } 289 HStore hStore = getHStore(regionName, cfName); 290 return hStore.getReadOnlyConfiguration(); 291 } 292 293 private DataTieringType getDataTieringType(Configuration conf) { 294 return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); 295 } 296 297 private long getDataTieringHotDataAge(Configuration conf) { 298 return Long.parseLong( 299 conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); 300 } 301 302 private long getDataTieringGracePeriod(Configuration conf) { 303 return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY, 304 String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD))); 305 } 306 307 /* 308 * This API traverses through the list of online regions and returns a subset of these files-names 309 * that are cold. 310 * @return List of names of files with cold data as per data-tiering logic. 311 */ 312 public Map<String, String> getColdFilesList() { 313 Map<String, String> coldFiles = new HashMap<>(); 314 for (HRegion r : this.onlineRegions.values()) { 315 for (HStore hStore : r.getStores()) { 316 Configuration conf = hStore.getReadOnlyConfiguration(); 317 DataTieringType dataTieringType = getDataTieringType(conf); 318 if (dataTieringType == DataTieringType.NONE) { 319 // Data-Tiering not enabled for the store. Just skip it. 320 continue; 321 } 322 Long hotDataAge = getDataTieringHotDataAge(conf); 323 324 for (HStoreFile hStoreFile : hStore.getStorefiles()) { 325 String hFileName = 326 hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName(); 327 long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 328 LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp)); 329 long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); 330 long fileAge = currentTimestamp - maxTimeStamp; 331 if (fileAge > hotDataAge) { 332 // Values do not matter. 333 coldFiles.put(hFileName, null); 334 } 335 } 336 } 337 } 338 return coldFiles; 339 } 340 341 private static boolean isDataTieringFeatureEnabled(Configuration conf) { 342 return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 343 DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED); 344 } 345 346 // Resets the instance to null. To be used only for testing. 347 public static void resetForTestingOnly() { 348 instance = null; 349 } 350}