001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.Date; 022import java.util.HashMap; 023import java.util.HashSet; 024import java.util.Map; 025import java.util.Set; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; 029import org.apache.hadoop.hbase.io.hfile.HFileInfo; 030import org.apache.hadoop.hbase.util.Bytes; 031import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 032import org.apache.yetus.audience.InterfaceAudience; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036/** 037 * The DataTieringManager class categorizes data into hot data and cold data based on the specified 038 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with 039 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} 040 * determines the logic for distinguishing data into hot or cold. By default, all data is considered 041 * as hot. 042 */ 043@InterfaceAudience.Private 044public class DataTieringManager { 045 private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); 046 public static final String GLOBAL_DATA_TIERING_ENABLED_KEY = 047 "hbase.regionserver.datatiering.enable"; 048 public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default 049 public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; 050 public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY = 051 "hbase.hstore.datatiering.grace.period.millis"; 052 public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0; 053 public static final String DATATIERING_HOT_DATA_AGE_KEY = 054 "hbase.hstore.datatiering.hot.age.millis"; 055 public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; 056 public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days 057 private static DataTieringManager instance; 058 private final Map<String, HRegion> onlineRegions; 059 060 private DataTieringManager(Map<String, HRegion> onlineRegions) { 061 this.onlineRegions = onlineRegions; 062 } 063 064 /** 065 * Initializes the DataTieringManager instance with the provided map of online regions, only if 066 * the configuration "hbase.regionserver.datatiering.enable" is enabled. 067 * @param conf Configuration object. 068 * @param onlineRegions A map containing online regions. 069 * @return True if the instance is instantiated successfully, false otherwise. 070 */ 071 public static synchronized boolean instantiate(Configuration conf, 072 Map<String, HRegion> onlineRegions) { 073 if (isDataTieringFeatureEnabled(conf) && instance == null) { 074 instance = new DataTieringManager(onlineRegions); 075 LOG.info("DataTieringManager instantiated successfully."); 076 return true; 077 } else { 078 LOG.warn("DataTieringManager is already instantiated."); 079 } 080 return false; 081 } 082 083 /** 084 * Retrieves the instance of DataTieringManager. 085 * @return The instance of DataTieringManager, if instantiated, null otherwise. 086 */ 087 public static synchronized DataTieringManager getInstance() { 088 return instance; 089 } 090 091 /** 092 * Determines whether data tiering is enabled for the given block cache key. 093 * @param key the block cache key 094 * @return {@code true} if data tiering is enabled for the HFile associated with the key, 095 * {@code false} otherwise 096 * @throws DataTieringException if there is an error retrieving the HFile path or configuration 097 */ 098 public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { 099 Path hFilePath = key.getFilePath(); 100 if (hFilePath == null) { 101 throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); 102 } 103 return isDataTieringEnabled(hFilePath); 104 } 105 106 /** 107 * Determines whether data tiering is enabled for the given HFile path. 108 * @param hFilePath the path to the HFile 109 * @return {@code true} if data tiering is enabled, {@code false} otherwise 110 * @throws DataTieringException if there is an error retrieving the configuration 111 */ 112 public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { 113 Configuration configuration = getConfiguration(hFilePath); 114 DataTieringType dataTieringType = getDataTieringType(configuration); 115 return !dataTieringType.equals(DataTieringType.NONE); 116 } 117 118 /** 119 * Determines whether the data associated with the given block cache key is considered hot. If the 120 * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not 121 * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by 122 * default. 123 * @param key the block cache key 124 * @return {@code true} if the data is hot, {@code false} otherwise 125 * @throws DataTieringException if there is an error retrieving data tiering information 126 */ 127 public boolean isHotData(BlockCacheKey key) throws DataTieringException { 128 Path hFilePath = key.getFilePath(); 129 if (hFilePath == null) { 130 throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); 131 } 132 return isHotData(hFilePath); 133 } 134 135 /** 136 * Determines whether the data associated with the given time range tracker is considered hot. If 137 * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum 138 * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers 139 * the data as hot by default. 140 * @param maxTimestamp the maximum timestamp associated with the data. 141 * @param conf The configuration object to use for determining hot data criteria. 142 * @return {@code true} if the data is hot, {@code false} otherwise 143 */ 144 public boolean isHotData(long maxTimestamp, Configuration conf) { 145 if (isWithinGracePeriod(maxTimestamp, conf)) { 146 return true; 147 } 148 DataTieringType dataTieringType = getDataTieringType(conf); 149 150 if ( 151 !dataTieringType.equals(DataTieringType.NONE) 152 && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP 153 ) { 154 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf)); 155 } 156 // DataTieringType.NONE or other types are considered hot by default 157 return true; 158 } 159 160 /** 161 * Determines whether the data in the HFile at the given path is considered hot based on the 162 * configured data tiering type and hot data age. If the data tiering type is set to 163 * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers 164 * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. 165 * @param hFilePath the path to the HFile 166 * @return {@code true} if the data is hot, {@code false} otherwise 167 * @throws DataTieringException if there is an error retrieving data tiering information 168 */ 169 public boolean isHotData(Path hFilePath) throws DataTieringException { 170 Configuration configuration = getConfiguration(hFilePath); 171 DataTieringType dataTieringType = getDataTieringType(configuration); 172 173 if (!dataTieringType.equals(DataTieringType.NONE)) { 174 HStoreFile hStoreFile = getHStoreFile(hFilePath); 175 if (hStoreFile == null) { 176 throw new DataTieringException( 177 "Store file corresponding to " + hFilePath + " doesn't exist"); 178 } 179 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 180 if (isWithinGracePeriod(maxTimestamp, configuration)) { 181 return true; 182 } 183 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 184 } 185 // DataTieringType.NONE or other types are considered hot by default 186 return true; 187 } 188 189 /** 190 * Determines whether the data in the HFile being read is considered hot based on the configured 191 * data tiering type and hot data age. If the data tiering type is set to 192 * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers 193 * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. 194 * @param hFileInfo Information about the HFile to determine if its data is hot. 195 * @param configuration The configuration object to use for determining hot data criteria. 196 * @return {@code true} if the data is hot, {@code false} otherwise 197 */ 198 public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) { 199 DataTieringType dataTieringType = getDataTieringType(configuration); 200 if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) { 201 long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo); 202 if (isWithinGracePeriod(maxTimestamp, configuration)) { 203 return true; 204 } 205 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); 206 } 207 // DataTieringType.NONE or other types are considered hot by default 208 return true; 209 } 210 211 private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) { 212 long gracePeriod = getDataTieringGracePeriod(conf); 213 return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod; 214 } 215 216 private boolean hotDataValidator(long maxTimestamp, long hotDataAge) { 217 long currentTimestamp = getCurrentTimestamp(); 218 long diff = currentTimestamp - maxTimestamp; 219 return diff <= hotDataAge; 220 } 221 222 private long getCurrentTimestamp() { 223 return EnvironmentEdgeManager.getDelegate().currentTime(); 224 } 225 226 /** 227 * Returns a set of cold data filenames from the given set of cached blocks. Cold data is 228 * determined by the configured data tiering type and hot data age. 229 * @param allCachedBlocks a set of all cached block cache keys 230 * @return a set of cold data filenames 231 * @throws DataTieringException if there is an error determining whether a block is hot 232 */ 233 public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks) 234 throws DataTieringException { 235 Set<String> coldHFiles = new HashSet<>(); 236 for (BlockCacheKey key : allCachedBlocks) { 237 if (coldHFiles.contains(key.getHfileName())) { 238 continue; 239 } 240 if (!isHotData(key)) { 241 coldHFiles.add(key.getHfileName()); 242 } 243 } 244 return coldHFiles; 245 } 246 247 private HRegion getHRegion(Path hFilePath) throws DataTieringException { 248 String regionId; 249 try { 250 regionId = HRegionFileSystem.getRegionId(hFilePath); 251 } catch (IOException e) { 252 throw new DataTieringException(e.getMessage()); 253 } 254 HRegion hRegion = this.onlineRegions.get(regionId); 255 if (hRegion == null) { 256 throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist"); 257 } 258 return hRegion; 259 } 260 261 private HStore getHStore(Path hFilePath) throws DataTieringException { 262 HRegion hRegion = getHRegion(hFilePath); 263 String columnFamily = hFilePath.getParent().getName(); 264 HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily)); 265 if (hStore == null) { 266 throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist"); 267 } 268 return hStore; 269 } 270 271 private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException { 272 HStore hStore = getHStore(hFilePath); 273 for (HStoreFile file : hStore.getStorefiles()) { 274 if (file.getPath().toUri().getPath().toString().equals(hFilePath.toString())) { 275 return file; 276 } 277 } 278 return null; 279 } 280 281 private Configuration getConfiguration(Path hFilePath) throws DataTieringException { 282 HStore hStore = getHStore(hFilePath); 283 return hStore.getReadOnlyConfiguration(); 284 } 285 286 private DataTieringType getDataTieringType(Configuration conf) { 287 return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); 288 } 289 290 private long getDataTieringHotDataAge(Configuration conf) { 291 return Long.parseLong( 292 conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); 293 } 294 295 private long getDataTieringGracePeriod(Configuration conf) { 296 return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY, 297 String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD))); 298 } 299 300 /* 301 * This API traverses through the list of online regions and returns a subset of these files-names 302 * that are cold. 303 * @return List of names of files with cold data as per data-tiering logic. 304 */ 305 public Map<String, String> getColdFilesList() { 306 Map<String, String> coldFiles = new HashMap<>(); 307 for (HRegion r : this.onlineRegions.values()) { 308 for (HStore hStore : r.getStores()) { 309 Configuration conf = hStore.getReadOnlyConfiguration(); 310 DataTieringType dataTieringType = getDataTieringType(conf); 311 if (dataTieringType == DataTieringType.NONE) { 312 // Data-Tiering not enabled for the store. Just skip it. 313 continue; 314 } 315 Long hotDataAge = getDataTieringHotDataAge(conf); 316 317 for (HStoreFile hStoreFile : hStore.getStorefiles()) { 318 String hFileName = 319 hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName(); 320 long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 321 LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp)); 322 long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); 323 long fileAge = currentTimestamp - maxTimeStamp; 324 if (fileAge > hotDataAge) { 325 // Values do not matter. 326 coldFiles.put(hFileName, null); 327 } 328 } 329 } 330 } 331 return coldFiles; 332 } 333 334 private static boolean isDataTieringFeatureEnabled(Configuration conf) { 335 return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 336 DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED); 337 } 338 339 // Resets the instance to null. To be used only for testing. 340 public static void resetForTestingOnly() { 341 instance = null; 342 } 343}