001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.Date;
022import java.util.HashMap;
023import java.util.HashSet;
024import java.util.Map;
025import java.util.Set;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
029import org.apache.hadoop.hbase.io.hfile.HFileInfo;
030import org.apache.hadoop.hbase.util.Bytes;
031import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
032import org.apache.yetus.audience.InterfaceAudience;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/**
037 * The DataTieringManager class categorizes data into hot data and cold data based on the specified
038 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with
039 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType}
040 * determines the logic for distinguishing data into hot or cold. By default, all data is considered
041 * as hot.
042 */
043@InterfaceAudience.Private
044public class DataTieringManager {
045  private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class);
046  public static final String GLOBAL_DATA_TIERING_ENABLED_KEY =
047    "hbase.regionserver.datatiering.enable";
048  public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default
049  public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type";
050  public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY =
051    "hbase.hstore.datatiering.grace.period.millis";
052  public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0;
053  public static final String DATATIERING_HOT_DATA_AGE_KEY =
054    "hbase.hstore.datatiering.hot.age.millis";
055  public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE;
056  public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days
057  private static DataTieringManager instance;
058  private final Map<String, HRegion> onlineRegions;
059
060  private DataTieringManager(Map<String, HRegion> onlineRegions) {
061    this.onlineRegions = onlineRegions;
062  }
063
064  /**
065   * Initializes the DataTieringManager instance with the provided map of online regions, only if
066   * the configuration "hbase.regionserver.datatiering.enable" is enabled.
067   * @param conf          Configuration object.
068   * @param onlineRegions A map containing online regions.
069   * @return True if the instance is instantiated successfully, false otherwise.
070   */
071  public static synchronized boolean instantiate(Configuration conf,
072    Map<String, HRegion> onlineRegions) {
073    if (isDataTieringFeatureEnabled(conf) && instance == null) {
074      instance = new DataTieringManager(onlineRegions);
075      LOG.info("DataTieringManager instantiated successfully.");
076      return true;
077    } else {
078      LOG.warn("DataTieringManager is already instantiated.");
079    }
080    return false;
081  }
082
083  /**
084   * Retrieves the instance of DataTieringManager.
085   * @return The instance of DataTieringManager, if instantiated, null otherwise.
086   */
087  public static synchronized DataTieringManager getInstance() {
088    return instance;
089  }
090
091  /**
092   * Determines whether data tiering is enabled for the given block cache key.
093   * @param key the block cache key
094   * @return {@code true} if data tiering is enabled for the HFile associated with the key,
095   *         {@code false} otherwise
096   * @throws DataTieringException if there is an error retrieving the HFile path or configuration
097   */
098  public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException {
099    Path hFilePath = key.getFilePath();
100    if (hFilePath == null) {
101      throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
102    }
103    return isDataTieringEnabled(hFilePath);
104  }
105
106  /**
107   * Determines whether data tiering is enabled for the given HFile path.
108   * @param hFilePath the path to the HFile
109   * @return {@code true} if data tiering is enabled, {@code false} otherwise
110   * @throws DataTieringException if there is an error retrieving the configuration
111   */
112  public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException {
113    Configuration configuration = getConfiguration(hFilePath);
114    DataTieringType dataTieringType = getDataTieringType(configuration);
115    return !dataTieringType.equals(DataTieringType.NONE);
116  }
117
118  /**
119   * Determines whether the data associated with the given block cache key is considered hot. If the
120   * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not
121   * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by
122   * default.
123   * @param key the block cache key
124   * @return {@code true} if the data is hot, {@code false} otherwise
125   * @throws DataTieringException if there is an error retrieving data tiering information
126   */
127  public boolean isHotData(BlockCacheKey key) throws DataTieringException {
128    Path hFilePath = key.getFilePath();
129    if (hFilePath == null) {
130      throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
131    }
132    return isHotData(hFilePath);
133  }
134
135  /**
136   * Determines whether the data associated with the given time range tracker is considered hot. If
137   * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum
138   * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers
139   * the data as hot by default.
140   * @param maxTimestamp the maximum timestamp associated with the data.
141   * @param conf         The configuration object to use for determining hot data criteria.
142   * @return {@code true} if the data is hot, {@code false} otherwise
143   */
144  public boolean isHotData(long maxTimestamp, Configuration conf) {
145    if (isWithinGracePeriod(maxTimestamp, conf)) {
146      return true;
147    }
148    DataTieringType dataTieringType = getDataTieringType(conf);
149
150    if (
151      !dataTieringType.equals(DataTieringType.NONE)
152        && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP
153    ) {
154      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf));
155    }
156    // DataTieringType.NONE or other types are considered hot by default
157    return true;
158  }
159
160  /**
161   * Determines whether the data in the HFile at the given path is considered hot based on the
162   * configured data tiering type and hot data age. If the data tiering type is set to
163   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
164   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
165   * @param hFilePath the path to the HFile
166   * @return {@code true} if the data is hot, {@code false} otherwise
167   * @throws DataTieringException if there is an error retrieving data tiering information
168   */
169  public boolean isHotData(Path hFilePath) throws DataTieringException {
170    Configuration configuration = getConfiguration(hFilePath);
171    DataTieringType dataTieringType = getDataTieringType(configuration);
172
173    if (!dataTieringType.equals(DataTieringType.NONE)) {
174      HStoreFile hStoreFile = getHStoreFile(hFilePath);
175      if (hStoreFile == null) {
176        throw new DataTieringException(
177          "Store file corresponding to " + hFilePath + " doesn't exist");
178      }
179      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
180      if (isWithinGracePeriod(maxTimestamp, configuration)) {
181        return true;
182      }
183      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
184    }
185    // DataTieringType.NONE or other types are considered hot by default
186    return true;
187  }
188
189  /**
190   * Determines whether the data in the HFile being read is considered hot based on the configured
191   * data tiering type and hot data age. If the data tiering type is set to
192   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
193   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
194   * @param hFileInfo     Information about the HFile to determine if its data is hot.
195   * @param configuration The configuration object to use for determining hot data criteria.
196   * @return {@code true} if the data is hot, {@code false} otherwise
197   */
198  public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) {
199    DataTieringType dataTieringType = getDataTieringType(configuration);
200    if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) {
201      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo);
202      if (isWithinGracePeriod(maxTimestamp, configuration)) {
203        return true;
204      }
205      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
206    }
207    // DataTieringType.NONE or other types are considered hot by default
208    return true;
209  }
210
211  private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) {
212    long gracePeriod = getDataTieringGracePeriod(conf);
213    return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod;
214  }
215
216  private boolean hotDataValidator(long maxTimestamp, long hotDataAge) {
217    long currentTimestamp = getCurrentTimestamp();
218    long diff = currentTimestamp - maxTimestamp;
219    return diff <= hotDataAge;
220  }
221
222  private long getCurrentTimestamp() {
223    return EnvironmentEdgeManager.getDelegate().currentTime();
224  }
225
226  /**
227   * Returns a set of cold data filenames from the given set of cached blocks. Cold data is
228   * determined by the configured data tiering type and hot data age.
229   * @param allCachedBlocks a set of all cached block cache keys
230   * @return a set of cold data filenames
231   * @throws DataTieringException if there is an error determining whether a block is hot
232   */
233  public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks)
234    throws DataTieringException {
235    Set<String> coldHFiles = new HashSet<>();
236    for (BlockCacheKey key : allCachedBlocks) {
237      if (coldHFiles.contains(key.getHfileName())) {
238        continue;
239      }
240      if (!isHotData(key)) {
241        coldHFiles.add(key.getHfileName());
242      }
243    }
244    return coldHFiles;
245  }
246
247  private HRegion getHRegion(Path hFilePath) throws DataTieringException {
248    String regionId;
249    try {
250      regionId = HRegionFileSystem.getRegionId(hFilePath);
251    } catch (IOException e) {
252      throw new DataTieringException(e.getMessage());
253    }
254    HRegion hRegion = this.onlineRegions.get(regionId);
255    if (hRegion == null) {
256      throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist");
257    }
258    return hRegion;
259  }
260
261  private HStore getHStore(Path hFilePath) throws DataTieringException {
262    HRegion hRegion = getHRegion(hFilePath);
263    String columnFamily = hFilePath.getParent().getName();
264    HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily));
265    if (hStore == null) {
266      throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist");
267    }
268    return hStore;
269  }
270
271  private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException {
272    HStore hStore = getHStore(hFilePath);
273    for (HStoreFile file : hStore.getStorefiles()) {
274      if (file.getPath().toUri().getPath().toString().equals(hFilePath.toString())) {
275        return file;
276      }
277    }
278    return null;
279  }
280
281  private Configuration getConfiguration(Path hFilePath) throws DataTieringException {
282    HStore hStore = getHStore(hFilePath);
283    return hStore.getReadOnlyConfiguration();
284  }
285
286  private DataTieringType getDataTieringType(Configuration conf) {
287    return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name()));
288  }
289
290  private long getDataTieringHotDataAge(Configuration conf) {
291    return Long.parseLong(
292      conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
293  }
294
295  private long getDataTieringGracePeriod(Configuration conf) {
296    return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY,
297      String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD)));
298  }
299
300  /*
301   * This API traverses through the list of online regions and returns a subset of these files-names
302   * that are cold.
303   * @return List of names of files with cold data as per data-tiering logic.
304   */
305  public Map<String, String> getColdFilesList() {
306    Map<String, String> coldFiles = new HashMap<>();
307    for (HRegion r : this.onlineRegions.values()) {
308      for (HStore hStore : r.getStores()) {
309        Configuration conf = hStore.getReadOnlyConfiguration();
310        DataTieringType dataTieringType = getDataTieringType(conf);
311        if (dataTieringType == DataTieringType.NONE) {
312          // Data-Tiering not enabled for the store. Just skip it.
313          continue;
314        }
315        Long hotDataAge = getDataTieringHotDataAge(conf);
316
317        for (HStoreFile hStoreFile : hStore.getStorefiles()) {
318          String hFileName =
319            hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
320          long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
321          LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp));
322          long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
323          long fileAge = currentTimestamp - maxTimeStamp;
324          if (fileAge > hotDataAge) {
325            // Values do not matter.
326            coldFiles.put(hFileName, null);
327          }
328        }
329      }
330    }
331    return coldFiles;
332  }
333
334  private static boolean isDataTieringFeatureEnabled(Configuration conf) {
335    return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY,
336      DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED);
337  }
338
339  // Resets the instance to null. To be used only for testing.
340  public static void resetForTestingOnly() {
341    instance = null;
342  }
343}