001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.util.Date;
021import java.util.HashMap;
022import java.util.HashSet;
023import java.util.Map;
024import java.util.Set;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.Path;
027import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
028import org.apache.hadoop.hbase.io.hfile.HFileInfo;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
031import org.apache.yetus.audience.InterfaceAudience;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035/**
036 * The DataTieringManager class categorizes data into hot data and cold data based on the specified
037 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with
038 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType}
039 * determines the logic for distinguishing data into hot or cold. By default, all data is considered
040 * as hot.
041 */
042@InterfaceAudience.Private
043public class DataTieringManager {
044  private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class);
045  public static final String GLOBAL_DATA_TIERING_ENABLED_KEY =
046    "hbase.regionserver.datatiering.enable";
047  public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default
048  public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type";
049  public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY =
050    "hbase.hstore.datatiering.grace.period.millis";
051  public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0;
052  public static final String DATATIERING_HOT_DATA_AGE_KEY =
053    "hbase.hstore.datatiering.hot.age.millis";
054  public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE;
055  public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days
056  private static DataTieringManager instance;
057  private final Map<String, HRegion> onlineRegions;
058
059  private DataTieringManager(Map<String, HRegion> onlineRegions) {
060    this.onlineRegions = onlineRegions;
061  }
062
063  /**
064   * Initializes the DataTieringManager instance with the provided map of online regions, only if
065   * the configuration "hbase.regionserver.datatiering.enable" is enabled.
066   * @param conf          Configuration object.
067   * @param onlineRegions A map containing online regions.
068   * @return True if the instance is instantiated successfully, false otherwise.
069   */
070  public static synchronized boolean instantiate(Configuration conf,
071    Map<String, HRegion> onlineRegions) {
072    if (!isDataTieringFeatureEnabled(conf)) {
073      LOG.debug("DataTiering feature is disabled (key: {}). Skipping instantiation.",
074        GLOBAL_DATA_TIERING_ENABLED_KEY);
075      return false;
076    }
077    if (instance != null) {
078      LOG.warn("DataTieringManager is already instantiated.");
079      return false;
080    }
081    instance = new DataTieringManager(onlineRegions);
082    LOG.info("DataTieringManager instantiated successfully.");
083    return true;
084  }
085
086  /**
087   * Retrieves the instance of DataTieringManager.
088   * @return The instance of DataTieringManager, if instantiated, null otherwise.
089   */
090  public static synchronized DataTieringManager getInstance() {
091    return instance;
092  }
093
094  /**
095   * Determines whether data tiering is enabled for the given block cache key.
096   * @param key the block cache key
097   * @return {@code true} if data tiering is enabled for the HFile associated with the key,
098   *         {@code false} otherwise
099   * @throws DataTieringException if there is an error retrieving the HFile path or configuration
100   */
101  public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException {
102    if (key.getCfName() == null || key.getRegionName() == null) {
103      throw new DataTieringException(
104        "BlockCacheKey doesn't contain Column Family Name or Region Name");
105    }
106    Configuration configuration =
107      getHStore(key.getRegionName(), key.getCfName()).getReadOnlyConfiguration();
108    DataTieringType dataTieringType = getDataTieringType(configuration);
109    return !dataTieringType.equals(DataTieringType.NONE);
110  }
111
112  /**
113   * Determines whether data tiering is enabled for the given HFile path.
114   * @param hFilePath the path to the HFile
115   * @return {@code true} if data tiering is enabled, {@code false} otherwise
116   * @throws DataTieringException if there is an error retrieving the configuration
117   */
118  public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException {
119    Configuration configuration = getConfiguration(hFilePath);
120    DataTieringType dataTieringType = getDataTieringType(configuration);
121    return !dataTieringType.equals(DataTieringType.NONE);
122  }
123
124  /**
125   * Determines whether the data associated with the given block cache key is considered hot. If the
126   * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not
127   * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by
128   * default.
129   * @param key the block cache key
130   * @return {@code true} if the data is hot, {@code false} otherwise
131   * @throws DataTieringException if there is an error retrieving data tiering information
132   */
133  public boolean isHotData(BlockCacheKey key) throws DataTieringException {
134    if (key.getRegionName() == null) {
135      throw new DataTieringException("BlockCacheKey doesn't contain Region Name");
136    }
137    if (key.getCfName() == null) {
138      throw new DataTieringException("BlockCacheKey doesn't contain CF Name");
139    }
140    if (key.getHfileName() == null) {
141      throw new DataTieringException("BlockCacheKey doesn't contain File Name");
142    }
143    return isHotData(key.getRegionName(), key.getCfName(), key.getHfileName());
144  }
145
146  /**
147   * Determines whether the data associated with the given time range tracker is considered hot. If
148   * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum
149   * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers
150   * the data as hot by default.
151   * @param maxTimestamp the maximum timestamp associated with the data.
152   * @param conf         The configuration object to use for determining hot data criteria.
153   * @return {@code true} if the data is hot, {@code false} otherwise
154   */
155  public boolean isHotData(long maxTimestamp, Configuration conf) {
156    if (isWithinGracePeriod(maxTimestamp, conf)) {
157      return true;
158    }
159    DataTieringType dataTieringType = getDataTieringType(conf);
160
161    if (
162      !dataTieringType.equals(DataTieringType.NONE)
163        && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP
164    ) {
165      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf));
166    }
167    // DataTieringType.NONE or other types are considered hot by default
168    return true;
169  }
170
171  private boolean isHotData(String region, String cf, String fileName) throws DataTieringException {
172    Configuration configuration = getHStore(region, cf).getReadOnlyConfiguration();
173    DataTieringType dataTieringType = getDataTieringType(configuration);
174    if (!dataTieringType.equals(DataTieringType.NONE)) {
175      HStoreFile hStoreFile = getHStoreFile(region, cf, fileName);
176      if (hStoreFile == null) {
177        throw new DataTieringException(
178          "Store file corresponding to " + region + "/" + cf + "/" + fileName + " doesn't exist");
179      }
180      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
181      if (isWithinGracePeriod(maxTimestamp, configuration)) {
182        return true;
183      }
184      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
185    }
186    // DataTieringType.NONE or other types are considered hot by default
187    return true;
188  }
189
190  /**
191   * Determines whether the data in the HFile being read is considered hot based on the configured
192   * data tiering type and hot data age. If the data tiering type is set to
193   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
194   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
195   * @param hFileInfo     Information about the HFile to determine if its data is hot.
196   * @param configuration The configuration object to use for determining hot data criteria.
197   * @return {@code true} if the data is hot, {@code false} otherwise
198   */
199  public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) {
200    DataTieringType dataTieringType = getDataTieringType(configuration);
201    if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) {
202      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo);
203      if (isWithinGracePeriod(maxTimestamp, configuration)) {
204        return true;
205      }
206      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
207    }
208    // DataTieringType.NONE or other types are considered hot by default
209    return true;
210  }
211
212  private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) {
213    long gracePeriod = getDataTieringGracePeriod(conf);
214    return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod;
215  }
216
217  private boolean hotDataValidator(long maxTimestamp, long hotDataAge) {
218    long currentTimestamp = getCurrentTimestamp();
219    long diff = currentTimestamp - maxTimestamp;
220    return diff <= hotDataAge;
221  }
222
223  private long getCurrentTimestamp() {
224    return EnvironmentEdgeManager.getDelegate().currentTime();
225  }
226
227  /**
228   * Returns a set of cold data filenames from the given set of cached blocks. Cold data is
229   * determined by the configured data tiering type and hot data age.
230   * @param allCachedBlocks a set of all cached block cache keys
231   * @return a set of cold data filenames
232   * @throws DataTieringException if there is an error determining whether a block is hot
233   */
234  public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks)
235    throws DataTieringException {
236    Set<String> coldHFiles = new HashSet<>();
237    for (BlockCacheKey key : allCachedBlocks) {
238      if (coldHFiles.contains(key.getHfileName())) {
239        continue;
240      }
241      if (!isHotData(key)) {
242        coldHFiles.add(key.getHfileName());
243      }
244    }
245    return coldHFiles;
246  }
247
248  private HRegion getHRegion(String region) throws DataTieringException {
249    HRegion hRegion = this.onlineRegions.get(region);
250    if (hRegion == null) {
251      throw new DataTieringException("HRegion corresponding to " + region + " doesn't exist");
252    }
253    return hRegion;
254  }
255
256  private HStore getHStore(String region, String cf) throws DataTieringException {
257    HRegion hRegion = getHRegion(region);
258    HStore hStore = hRegion.getStore(Bytes.toBytes(cf));
259    if (hStore == null) {
260      throw new DataTieringException(
261        "HStore corresponding to " + region + "/" + cf + " doesn't exist");
262    }
263    return hStore;
264  }
265
266  private HStoreFile getHStoreFile(String region, String cf, String fileName)
267    throws DataTieringException {
268    HStore hStore = getHStore(region, cf);
269    for (HStoreFile file : hStore.getStorefiles()) {
270      if (file.getPath().getName().equals(fileName)) {
271        return file;
272      }
273    }
274    return null;
275  }
276
277  private Configuration getConfiguration(Path hFilePath) throws DataTieringException {
278    String regionName = null;
279    String cfName = null;
280    try {
281      regionName = hFilePath.getParent().getParent().getName();
282      cfName = hFilePath.getParent().getName();
283    } catch (Exception e) {
284      throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
285    }
286    if (regionName == null || cfName == null) {
287      throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
288    }
289    HStore hStore = getHStore(regionName, cfName);
290    return hStore.getReadOnlyConfiguration();
291  }
292
293  private DataTieringType getDataTieringType(Configuration conf) {
294    return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name()));
295  }
296
297  private long getDataTieringHotDataAge(Configuration conf) {
298    return Long.parseLong(
299      conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
300  }
301
302  private long getDataTieringGracePeriod(Configuration conf) {
303    return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY,
304      String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD)));
305  }
306
307  /*
308   * This API traverses through the list of online regions and returns a subset of these files-names
309   * that are cold.
310   * @return List of names of files with cold data as per data-tiering logic.
311   */
312  public Map<String, String> getColdFilesList() {
313    Map<String, String> coldFiles = new HashMap<>();
314    for (HRegion r : this.onlineRegions.values()) {
315      for (HStore hStore : r.getStores()) {
316        Configuration conf = hStore.getReadOnlyConfiguration();
317        DataTieringType dataTieringType = getDataTieringType(conf);
318        if (dataTieringType == DataTieringType.NONE) {
319          // Data-Tiering not enabled for the store. Just skip it.
320          continue;
321        }
322        Long hotDataAge = getDataTieringHotDataAge(conf);
323
324        for (HStoreFile hStoreFile : hStore.getStorefiles()) {
325          String hFileName =
326            hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
327          long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
328          LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp));
329          long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
330          long fileAge = currentTimestamp - maxTimeStamp;
331          if (fileAge > hotDataAge) {
332            // Values do not matter.
333            coldFiles.put(hFileName, null);
334          }
335        }
336      }
337    }
338    return coldFiles;
339  }
340
341  private static boolean isDataTieringFeatureEnabled(Configuration conf) {
342    return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY,
343      DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED);
344  }
345
346  // Resets the instance to null. To be used only for testing.
347  public static void resetForTestingOnly() {
348    instance = null;
349  }
350}