001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.util.Date; 021import java.util.HashMap; 022import java.util.HashSet; 023import java.util.Map; 024import java.util.Set; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; 028import org.apache.hadoop.hbase.io.hfile.HFileInfo; 029import org.apache.hadoop.hbase.util.Bytes; 030import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 031import org.apache.yetus.audience.InterfaceAudience; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035/** 036 * The DataTieringManager class categorizes data into hot data and cold data based on the specified 037 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with 038 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} 039 * determines the logic for distinguishing data into hot or cold. By default, all data is considered 040 * as hot. 041 */ 042@InterfaceAudience.Private 043public class DataTieringManager { 044 private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); 045 public static final String GLOBAL_DATA_TIERING_ENABLED_KEY = 046 "hbase.regionserver.datatiering.enable"; 047 public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default 048 public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; 049 public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY = 050 "hbase.hstore.datatiering.grace.period.millis"; 051 public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0; 052 public static final String DATATIERING_HOT_DATA_AGE_KEY = 053 "hbase.hstore.datatiering.hot.age.millis"; 054 public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; 055 public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days 056 private static DataTieringManager instance; 057 private final Map<String, HRegion> onlineRegions; 058 059 private DataTieringManager(Map<String, HRegion> onlineRegions) { 060 this.onlineRegions = onlineRegions; 061 } 062 063 /** 064 * Initializes the DataTieringManager instance with the provided map of online regions, only if 065 * the configuration "hbase.regionserver.datatiering.enable" is enabled. 066 * @param conf Configuration object. 067 * @param onlineRegions A map containing online regions. 068 * @return True if the instance is instantiated successfully, false otherwise. 069 */ 070 public static synchronized boolean instantiate(Configuration conf, 071 Map<String, HRegion> onlineRegions) { 072 if (isDataTieringFeatureEnabled(conf) && instance == null) { 073 instance = new DataTieringManager(onlineRegions); 074 LOG.info("DataTieringManager instantiated successfully."); 075 return true; 076 } else { 077 LOG.warn("DataTieringManager is already instantiated."); 078 } 079 return false; 080 } 081 082 /** 083 * Retrieves the instance of DataTieringManager. 084 * @return The instance of DataTieringManager, if instantiated, null otherwise. 085 */ 086 public static synchronized DataTieringManager getInstance() { 087 return instance; 088 } 089 090 /** 091 * Determines whether data tiering is enabled for the given block cache key. 092 * @param key the block cache key 093 * @return {@code true} if data tiering is enabled for the HFile associated with the key, 094 * {@code false} otherwise 095 * @throws DataTieringException if there is an error retrieving the HFile path or configuration 096 */ 097 public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { 098 if (key.getCfName() == null || key.getRegionName() == null) { 099 throw new DataTieringException( 100 "BlockCacheKey doesn't contain Column Family Name or Region Name"); 101 } 102 Configuration configuration = 103 getHStore(key.getRegionName(), key.getCfName()).getReadOnlyConfiguration(); 104 DataTieringType dataTieringType = getDataTieringType(configuration); 105 return !dataTieringType.equals(DataTieringType.NONE); 106 } 107 108 /** 109 * Determines whether data tiering is enabled for the given HFile path. 110 * @param hFilePath the path to the HFile 111 * @return {@code true} if data tiering is enabled, {@code false} otherwise 112 * @throws DataTieringException if there is an error retrieving the configuration 113 */ 114 public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { 115 Configuration configuration = getConfiguration(hFilePath); 116 DataTieringType dataTieringType = getDataTieringType(configuration); 117 return !dataTieringType.equals(DataTieringType.NONE); 118 } 119 120 /** 121 * Determines whether the data associated with the given block cache key is considered hot. If the 122 * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not 123 * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by 124 * default. 125 * @param key the block cache key 126 * @return {@code true} if the data is hot, {@code false} otherwise 127 * @throws DataTieringException if there is an error retrieving data tiering information 128 */ 129 public boolean isHotData(BlockCacheKey key) throws DataTieringException { 130 if (key.getRegionName() == null) { 131 throw new DataTieringException("BlockCacheKey doesn't contain Region Name"); 132 } 133 if (key.getCfName() == null) { 134 throw new DataTieringException("BlockCacheKey doesn't contain CF Name"); 135 } 136 if (key.getHfileName() == null) { 137 throw new DataTieringException("BlockCacheKey doesn't contain File Name"); 138 } 139 return isHotData(key.getRegionName(), key.getCfName(), key.getHfileName()); 140 } 141 142 /** 143 * Determines whether the data associated with the given time range tracker is considered hot. If 144 * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum 145 * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers 146 * the data as hot by default. 147 * @param maxTimestamp the maximum timestamp associated with the data. 148 * @param conf The configuration object to use for determining hot data criteria. 149 * @return {@code true} if the data is hot, {@code false} otherwise 150 */ 151 public boolean isHotData(long maxTimestamp, Configuration conf) { 152 if (isWithinGracePeriod(maxTimestamp, conf)) { 153 return true; 154 } 155 DataTieringType dataTieringType = getDataTieringType(conf); 156 157 if ( 158 !dataTieringType.equals(DataTieringType.NONE) 159 && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP 160 ) { 161 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf)); 162 } 163 // DataTieringType.NONE or other types are considered hot by default 164 return true; 165 } 166 167 private boolean isHotData(String region, String cf, String fileName) throws DataTieringException { 168 Configuration configuration = getHStore(region, cf).getReadOnlyConfiguration(); 169 DataTieringType dataTieringType = getDataTieringType(configuration); 170 if (!dataTieringType.equals(DataTieringType.NONE)) { 171 HStoreFile hStoreFile = getHStoreFile(region, cf, fileName); 172 if (hStoreFile == null) { 173 throw new DataTieringException( 174 "Store file corresponding to " + region + "/" + cf + "/" + fileName + " doesn't exist"); 175 } 176 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 177 if (isWithinGracePeriod(maxTimestamp, configuration)) { 178 return true; 179 } 180 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 181 } 182 // DataTieringType.NONE or other types are considered hot by default 183 return true; 184 } 185 186 /** 187 * Determines whether the data in the HFile being read is considered hot based on the configured 188 * data tiering type and hot data age. If the data tiering type is set to 189 * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers 190 * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. 191 * @param hFileInfo Information about the HFile to determine if its data is hot. 192 * @param configuration The configuration object to use for determining hot data criteria. 193 * @return {@code true} if the data is hot, {@code false} otherwise 194 */ 195 public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) { 196 DataTieringType dataTieringType = getDataTieringType(configuration); 197 if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) { 198 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo); 199 if (isWithinGracePeriod(maxTimestamp, configuration)) { 200 return true; 201 } 202 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 203 } 204 // DataTieringType.NONE or other types are considered hot by default 205 return true; 206 } 207 208 private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) { 209 long gracePeriod = getDataTieringGracePeriod(conf); 210 return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod; 211 } 212 213 private boolean hotDataValidator(long maxTimestamp, long hotDataAge) { 214 long currentTimestamp = getCurrentTimestamp(); 215 long diff = currentTimestamp - maxTimestamp; 216 return diff <= hotDataAge; 217 } 218 219 private long getCurrentTimestamp() { 220 return EnvironmentEdgeManager.getDelegate().currentTime(); 221 } 222 223 /** 224 * Returns a set of cold data filenames from the given set of cached blocks. Cold data is 225 * determined by the configured data tiering type and hot data age. 226 * @param allCachedBlocks a set of all cached block cache keys 227 * @return a set of cold data filenames 228 * @throws DataTieringException if there is an error determining whether a block is hot 229 */ 230 public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks) 231 throws DataTieringException { 232 Set<String> coldHFiles = new HashSet<>(); 233 for (BlockCacheKey key : allCachedBlocks) { 234 if (coldHFiles.contains(key.getHfileName())) { 235 continue; 236 } 237 if (!isHotData(key)) { 238 coldHFiles.add(key.getHfileName()); 239 } 240 } 241 return coldHFiles; 242 } 243 244 private HRegion getHRegion(String region) throws DataTieringException { 245 HRegion hRegion = this.onlineRegions.get(region); 246 if (hRegion == null) { 247 throw new DataTieringException("HRegion corresponding to " + region + " doesn't exist"); 248 } 249 return hRegion; 250 } 251 252 private HStore getHStore(String region, String cf) throws DataTieringException { 253 HRegion hRegion = getHRegion(region); 254 HStore hStore = hRegion.getStore(Bytes.toBytes(cf)); 255 if (hStore == null) { 256 throw new DataTieringException( 257 "HStore corresponding to " + region + "/" + cf + " doesn't exist"); 258 } 259 return hStore; 260 } 261 262 private HStoreFile getHStoreFile(String region, String cf, String fileName) 263 throws DataTieringException { 264 HStore hStore = getHStore(region, cf); 265 for (HStoreFile file : hStore.getStorefiles()) { 266 if (file.getPath().getName().equals(fileName)) { 267 return file; 268 } 269 } 270 return null; 271 } 272 273 private Configuration getConfiguration(Path hFilePath) throws DataTieringException { 274 String regionName = null; 275 String cfName = null; 276 try { 277 regionName = hFilePath.getParent().getParent().getName(); 278 cfName = hFilePath.getParent().getName(); 279 } catch (Exception e) { 280 throw new DataTieringException("Incorrect HFile Path: " + hFilePath); 281 } 282 if (regionName == null || cfName == null) { 283 throw new DataTieringException("Incorrect HFile Path: " + hFilePath); 284 } 285 HStore hStore = getHStore(regionName, cfName); 286 return hStore.getReadOnlyConfiguration(); 287 } 288 289 private DataTieringType getDataTieringType(Configuration conf) { 290 return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); 291 } 292 293 private long getDataTieringHotDataAge(Configuration conf) { 294 return Long.parseLong( 295 conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); 296 } 297 298 private long getDataTieringGracePeriod(Configuration conf) { 299 return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY, 300 String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD))); 301 } 302 303 /* 304 * This API traverses through the list of online regions and returns a subset of these files-names 305 * that are cold. 306 * @return List of names of files with cold data as per data-tiering logic. 307 */ 308 public Map<String, String> getColdFilesList() { 309 Map<String, String> coldFiles = new HashMap<>(); 310 for (HRegion r : this.onlineRegions.values()) { 311 for (HStore hStore : r.getStores()) { 312 Configuration conf = hStore.getReadOnlyConfiguration(); 313 DataTieringType dataTieringType = getDataTieringType(conf); 314 if (dataTieringType == DataTieringType.NONE) { 315 // Data-Tiering not enabled for the store. Just skip it. 316 continue; 317 } 318 Long hotDataAge = getDataTieringHotDataAge(conf); 319 320 for (HStoreFile hStoreFile : hStore.getStorefiles()) { 321 String hFileName = 322 hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName(); 323 long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 324 LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp)); 325 long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); 326 long fileAge = currentTimestamp - maxTimeStamp; 327 if (fileAge > hotDataAge) { 328 // Values do not matter. 329 coldFiles.put(hFileName, null); 330 } 331 } 332 } 333 } 334 return coldFiles; 335 } 336 337 private static boolean isDataTieringFeatureEnabled(Configuration conf) { 338 return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 339 DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED); 340 } 341 342 // Resets the instance to null. To be used only for testing. 343 public static void resetForTestingOnly() { 344 instance = null; 345 } 346}