001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.Date; 022import java.util.HashMap; 023import java.util.HashSet; 024import java.util.Map; 025import java.util.Set; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; 029import org.apache.hadoop.hbase.io.hfile.HFileInfo; 030import org.apache.hadoop.hbase.util.Bytes; 031import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 032import org.apache.yetus.audience.InterfaceAudience; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036/** 037 * The DataTieringManager class categorizes data into hot data and cold data based on the specified 038 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with 039 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} 040 * determines the logic for distinguishing data into hot or cold. By default, all data is considered 041 * as hot. 042 */ 043@InterfaceAudience.Private 044public class DataTieringManager { 045 private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); 046 public static final String GLOBAL_DATA_TIERING_ENABLED_KEY = 047 "hbase.regionserver.datatiering.enable"; 048 public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default 049 public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; 050 public static final String DATATIERING_HOT_DATA_AGE_KEY = 051 "hbase.hstore.datatiering.hot.age.millis"; 052 public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; 053 public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days 054 private static DataTieringManager instance; 055 private final Map<String, HRegion> onlineRegions; 056 057 private DataTieringManager(Map<String, HRegion> onlineRegions) { 058 this.onlineRegions = onlineRegions; 059 } 060 061 /** 062 * Initializes the DataTieringManager instance with the provided map of online regions, only if 063 * the configuration "hbase.regionserver.datatiering.enable" is enabled. 064 * @param conf Configuration object. 065 * @param onlineRegions A map containing online regions. 066 * @return True if the instance is instantiated successfully, false otherwise. 067 */ 068 public static synchronized boolean instantiate(Configuration conf, 069 Map<String, HRegion> onlineRegions) { 070 if (isDataTieringFeatureEnabled(conf) && instance == null) { 071 instance = new DataTieringManager(onlineRegions); 072 LOG.info("DataTieringManager instantiated successfully."); 073 return true; 074 } else { 075 LOG.warn("DataTieringManager is already instantiated."); 076 } 077 return false; 078 } 079 080 /** 081 * Retrieves the instance of DataTieringManager. 082 * @return The instance of DataTieringManager, if instantiated, null otherwise. 083 */ 084 public static synchronized DataTieringManager getInstance() { 085 return instance; 086 } 087 088 /** 089 * Determines whether data tiering is enabled for the given block cache key. 090 * @param key the block cache key 091 * @return {@code true} if data tiering is enabled for the HFile associated with the key, 092 * {@code false} otherwise 093 * @throws DataTieringException if there is an error retrieving the HFile path or configuration 094 */ 095 public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { 096 Path hFilePath = key.getFilePath(); 097 if (hFilePath == null) { 098 throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); 099 } 100 return isDataTieringEnabled(hFilePath); 101 } 102 103 /** 104 * Determines whether data tiering is enabled for the given HFile path. 105 * @param hFilePath the path to the HFile 106 * @return {@code true} if data tiering is enabled, {@code false} otherwise 107 * @throws DataTieringException if there is an error retrieving the configuration 108 */ 109 public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { 110 Configuration configuration = getConfiguration(hFilePath); 111 DataTieringType dataTieringType = getDataTieringType(configuration); 112 return !dataTieringType.equals(DataTieringType.NONE); 113 } 114 115 /** 116 * Determines whether the data associated with the given block cache key is considered hot. If the 117 * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not 118 * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by 119 * default. 120 * @param key the block cache key 121 * @return {@code true} if the data is hot, {@code false} otherwise 122 * @throws DataTieringException if there is an error retrieving data tiering information 123 */ 124 public boolean isHotData(BlockCacheKey key) throws DataTieringException { 125 Path hFilePath = key.getFilePath(); 126 if (hFilePath == null) { 127 throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); 128 } 129 return isHotData(hFilePath); 130 } 131 132 /** 133 * Determines whether the data associated with the given time range tracker is considered hot. If 134 * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum 135 * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers 136 * the data as hot by default. 137 * @param maxTimestamp the maximum timestamp associated with the data. 138 * @param conf The configuration object to use for determining hot data criteria. 139 * @return {@code true} if the data is hot, {@code false} otherwise 140 */ 141 public boolean isHotData(long maxTimestamp, Configuration conf) { 142 DataTieringType dataTieringType = getDataTieringType(conf); 143 144 if ( 145 !dataTieringType.equals(DataTieringType.NONE) 146 && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP 147 ) { 148 return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf)); 149 } 150 // DataTieringType.NONE or other types are considered hot by default 151 return true; 152 } 153 154 /** 155 * Determines whether the data in the HFile at the given path is considered hot based on the 156 * configured data tiering type and hot data age. If the data tiering type is set to 157 * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers 158 * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. 159 * @param hFilePath the path to the HFile 160 * @return {@code true} if the data is hot, {@code false} otherwise 161 * @throws DataTieringException if there is an error retrieving data tiering information 162 */ 163 public boolean isHotData(Path hFilePath) throws DataTieringException { 164 Configuration configuration = getConfiguration(hFilePath); 165 DataTieringType dataTieringType = getDataTieringType(configuration); 166 167 if (!dataTieringType.equals(DataTieringType.NONE)) { 168 HStoreFile hStoreFile = getHStoreFile(hFilePath); 169 if (hStoreFile == null) { 170 throw new DataTieringException( 171 "Store file corresponding to " + hFilePath + " doesn't exist"); 172 } 173 return hotDataValidator(dataTieringType.getInstance().getTimestamp(getHStoreFile(hFilePath)), 174 getDataTieringHotDataAge(configuration)); 175 } 176 // DataTieringType.NONE or other types are considered hot by default 177 return true; 178 } 179 180 /** 181 * Determines whether the data in the HFile being read is considered hot based on the configured 182 * data tiering type and hot data age. If the data tiering type is set to 183 * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers 184 * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. 185 * @param hFileInfo Information about the HFile to determine if its data is hot. 186 * @param configuration The configuration object to use for determining hot data criteria. 187 * @return {@code true} if the data is hot, {@code false} otherwise 188 */ 189 public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) { 190 DataTieringType dataTieringType = getDataTieringType(configuration); 191 if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) { 192 return hotDataValidator(dataTieringType.getInstance().getTimestamp(hFileInfo), 193 getDataTieringHotDataAge(configuration)); 194 } 195 // DataTieringType.NONE or other types are considered hot by default 196 return true; 197 } 198 199 private boolean hotDataValidator(long maxTimestamp, long hotDataAge) { 200 long currentTimestamp = getCurrentTimestamp(); 201 long diff = currentTimestamp - maxTimestamp; 202 return diff <= hotDataAge; 203 } 204 205 private long getCurrentTimestamp() { 206 return EnvironmentEdgeManager.getDelegate().currentTime(); 207 } 208 209 /** 210 * Returns a set of cold data filenames from the given set of cached blocks. Cold data is 211 * determined by the configured data tiering type and hot data age. 212 * @param allCachedBlocks a set of all cached block cache keys 213 * @return a set of cold data filenames 214 * @throws DataTieringException if there is an error determining whether a block is hot 215 */ 216 public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks) 217 throws DataTieringException { 218 Set<String> coldHFiles = new HashSet<>(); 219 for (BlockCacheKey key : allCachedBlocks) { 220 if (coldHFiles.contains(key.getHfileName())) { 221 continue; 222 } 223 if (!isHotData(key)) { 224 coldHFiles.add(key.getHfileName()); 225 } 226 } 227 return coldHFiles; 228 } 229 230 private HRegion getHRegion(Path hFilePath) throws DataTieringException { 231 String regionId; 232 try { 233 regionId = HRegionFileSystem.getRegionId(hFilePath); 234 } catch (IOException e) { 235 throw new DataTieringException(e.getMessage()); 236 } 237 HRegion hRegion = this.onlineRegions.get(regionId); 238 if (hRegion == null) { 239 throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist"); 240 } 241 return hRegion; 242 } 243 244 private HStore getHStore(Path hFilePath) throws DataTieringException { 245 HRegion hRegion = getHRegion(hFilePath); 246 String columnFamily = hFilePath.getParent().getName(); 247 HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily)); 248 if (hStore == null) { 249 throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist"); 250 } 251 return hStore; 252 } 253 254 private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException { 255 HStore hStore = getHStore(hFilePath); 256 for (HStoreFile file : hStore.getStorefiles()) { 257 if (file.getPath().toUri().getPath().toString().equals(hFilePath.toString())) { 258 return file; 259 } 260 } 261 return null; 262 } 263 264 private Configuration getConfiguration(Path hFilePath) throws DataTieringException { 265 HStore hStore = getHStore(hFilePath); 266 return hStore.getReadOnlyConfiguration(); 267 } 268 269 private DataTieringType getDataTieringType(Configuration conf) { 270 return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); 271 } 272 273 private long getDataTieringHotDataAge(Configuration conf) { 274 return Long.parseLong( 275 conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); 276 } 277 278 /* 279 * This API traverses through the list of online regions and returns a subset of these files-names 280 * that are cold. 281 * @return List of names of files with cold data as per data-tiering logic. 282 */ 283 public Map<String, String> getColdFilesList() { 284 Map<String, String> coldFiles = new HashMap<>(); 285 for (HRegion r : this.onlineRegions.values()) { 286 for (HStore hStore : r.getStores()) { 287 Configuration conf = hStore.getReadOnlyConfiguration(); 288 DataTieringType dataTieringType = getDataTieringType(conf); 289 if (dataTieringType == DataTieringType.NONE) { 290 // Data-Tiering not enabled for the store. Just skip it. 291 continue; 292 } 293 Long hotDataAge = getDataTieringHotDataAge(conf); 294 295 for (HStoreFile hStoreFile : hStore.getStorefiles()) { 296 String hFileName = 297 hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName(); 298 long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile); 299 LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp)); 300 long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); 301 long fileAge = currentTimestamp - maxTimeStamp; 302 if (fileAge > hotDataAge) { 303 // Values do not matter. 304 coldFiles.put(hFileName, null); 305 } 306 } 307 } 308 } 309 return coldFiles; 310 } 311 312 private static boolean isDataTieringFeatureEnabled(Configuration conf) { 313 return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 314 DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED); 315 } 316 317 // Resets the instance to null. To be used only for testing. 318 public static void resetForTestingOnly() { 319 instance = null; 320 } 321}