001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.util.Date;
021import java.util.HashMap;
022import java.util.HashSet;
023import java.util.Map;
024import java.util.Set;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.Path;
027import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
028import org.apache.hadoop.hbase.io.hfile.HFileInfo;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
031import org.apache.yetus.audience.InterfaceAudience;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035/**
036 * The DataTieringManager class categorizes data into hot data and cold data based on the specified
037 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with
038 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType}
039 * determines the logic for distinguishing data into hot or cold. By default, all data is considered
040 * as hot.
041 */
042@InterfaceAudience.Private
043public class DataTieringManager {
044  private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class);
045  public static final String GLOBAL_DATA_TIERING_ENABLED_KEY =
046    "hbase.regionserver.datatiering.enable";
047  public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default
048  public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type";
049  public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY =
050    "hbase.hstore.datatiering.grace.period.millis";
051  public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0;
052  public static final String DATATIERING_HOT_DATA_AGE_KEY =
053    "hbase.hstore.datatiering.hot.age.millis";
054  public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE;
055  public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days
056  private static DataTieringManager instance;
057  private final Map<String, HRegion> onlineRegions;
058
059  private DataTieringManager(Map<String, HRegion> onlineRegions) {
060    this.onlineRegions = onlineRegions;
061  }
062
063  /**
064   * Initializes the DataTieringManager instance with the provided map of online regions, only if
065   * the configuration "hbase.regionserver.datatiering.enable" is enabled.
066   * @param conf          Configuration object.
067   * @param onlineRegions A map containing online regions.
068   * @return True if the instance is instantiated successfully, false otherwise.
069   */
070  public static synchronized boolean instantiate(Configuration conf,
071    Map<String, HRegion> onlineRegions) {
072    if (isDataTieringFeatureEnabled(conf) && instance == null) {
073      instance = new DataTieringManager(onlineRegions);
074      LOG.info("DataTieringManager instantiated successfully.");
075      return true;
076    } else {
077      LOG.warn("DataTieringManager is already instantiated.");
078    }
079    return false;
080  }
081
082  /**
083   * Retrieves the instance of DataTieringManager.
084   * @return The instance of DataTieringManager, if instantiated, null otherwise.
085   */
086  public static synchronized DataTieringManager getInstance() {
087    return instance;
088  }
089
090  /**
091   * Determines whether data tiering is enabled for the given block cache key.
092   * @param key the block cache key
093   * @return {@code true} if data tiering is enabled for the HFile associated with the key,
094   *         {@code false} otherwise
095   * @throws DataTieringException if there is an error retrieving the HFile path or configuration
096   */
097  public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException {
098    if (key.getCfName() == null || key.getRegionName() == null) {
099      throw new DataTieringException(
100        "BlockCacheKey doesn't contain Column Family Name or Region Name");
101    }
102    Configuration configuration =
103      getHStore(key.getRegionName(), key.getCfName()).getReadOnlyConfiguration();
104    DataTieringType dataTieringType = getDataTieringType(configuration);
105    return !dataTieringType.equals(DataTieringType.NONE);
106  }
107
108  /**
109   * Determines whether data tiering is enabled for the given HFile path.
110   * @param hFilePath the path to the HFile
111   * @return {@code true} if data tiering is enabled, {@code false} otherwise
112   * @throws DataTieringException if there is an error retrieving the configuration
113   */
114  public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException {
115    Configuration configuration = getConfiguration(hFilePath);
116    DataTieringType dataTieringType = getDataTieringType(configuration);
117    return !dataTieringType.equals(DataTieringType.NONE);
118  }
119
120  /**
121   * Determines whether the data associated with the given block cache key is considered hot. If the
122   * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not
123   * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by
124   * default.
125   * @param key the block cache key
126   * @return {@code true} if the data is hot, {@code false} otherwise
127   * @throws DataTieringException if there is an error retrieving data tiering information
128   */
129  public boolean isHotData(BlockCacheKey key) throws DataTieringException {
130    if (key.getRegionName() == null) {
131      throw new DataTieringException("BlockCacheKey doesn't contain Region Name");
132    }
133    if (key.getCfName() == null) {
134      throw new DataTieringException("BlockCacheKey doesn't contain CF Name");
135    }
136    if (key.getHfileName() == null) {
137      throw new DataTieringException("BlockCacheKey doesn't contain File Name");
138    }
139    return isHotData(key.getRegionName(), key.getCfName(), key.getHfileName());
140  }
141
142  /**
143   * Determines whether the data associated with the given time range tracker is considered hot. If
144   * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum
145   * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers
146   * the data as hot by default.
147   * @param maxTimestamp the maximum timestamp associated with the data.
148   * @param conf         The configuration object to use for determining hot data criteria.
149   * @return {@code true} if the data is hot, {@code false} otherwise
150   */
151  public boolean isHotData(long maxTimestamp, Configuration conf) {
152    if (isWithinGracePeriod(maxTimestamp, conf)) {
153      return true;
154    }
155    DataTieringType dataTieringType = getDataTieringType(conf);
156
157    if (
158      !dataTieringType.equals(DataTieringType.NONE)
159        && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP
160    ) {
161      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf));
162    }
163    // DataTieringType.NONE or other types are considered hot by default
164    return true;
165  }
166
167  private boolean isHotData(String region, String cf, String fileName) throws DataTieringException {
168    Configuration configuration = getHStore(region, cf).getReadOnlyConfiguration();
169    DataTieringType dataTieringType = getDataTieringType(configuration);
170    if (!dataTieringType.equals(DataTieringType.NONE)) {
171      HStoreFile hStoreFile = getHStoreFile(region, cf, fileName);
172      if (hStoreFile == null) {
173        throw new DataTieringException(
174          "Store file corresponding to " + region + "/" + cf + "/" + fileName + " doesn't exist");
175      }
176      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
177      if (isWithinGracePeriod(maxTimestamp, configuration)) {
178        return true;
179      }
180      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
181    }
182    // DataTieringType.NONE or other types are considered hot by default
183    return true;
184  }
185
186  /**
187   * Determines whether the data in the HFile being read is considered hot based on the configured
188   * data tiering type and hot data age. If the data tiering type is set to
189   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
190   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
191   * @param hFileInfo     Information about the HFile to determine if its data is hot.
192   * @param configuration The configuration object to use for determining hot data criteria.
193   * @return {@code true} if the data is hot, {@code false} otherwise
194   */
195  public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) {
196    DataTieringType dataTieringType = getDataTieringType(configuration);
197    if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) {
198      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo);
199      if (isWithinGracePeriod(maxTimestamp, configuration)) {
200        return true;
201      }
202      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
203    }
204    // DataTieringType.NONE or other types are considered hot by default
205    return true;
206  }
207
208  private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) {
209    long gracePeriod = getDataTieringGracePeriod(conf);
210    return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod;
211  }
212
213  private boolean hotDataValidator(long maxTimestamp, long hotDataAge) {
214    long currentTimestamp = getCurrentTimestamp();
215    long diff = currentTimestamp - maxTimestamp;
216    return diff <= hotDataAge;
217  }
218
219  private long getCurrentTimestamp() {
220    return EnvironmentEdgeManager.getDelegate().currentTime();
221  }
222
223  /**
224   * Returns a set of cold data filenames from the given set of cached blocks. Cold data is
225   * determined by the configured data tiering type and hot data age.
226   * @param allCachedBlocks a set of all cached block cache keys
227   * @return a set of cold data filenames
228   * @throws DataTieringException if there is an error determining whether a block is hot
229   */
230  public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks)
231    throws DataTieringException {
232    Set<String> coldHFiles = new HashSet<>();
233    for (BlockCacheKey key : allCachedBlocks) {
234      if (coldHFiles.contains(key.getHfileName())) {
235        continue;
236      }
237      if (!isHotData(key)) {
238        coldHFiles.add(key.getHfileName());
239      }
240    }
241    return coldHFiles;
242  }
243
244  private HRegion getHRegion(String region) throws DataTieringException {
245    HRegion hRegion = this.onlineRegions.get(region);
246    if (hRegion == null) {
247      throw new DataTieringException("HRegion corresponding to " + region + " doesn't exist");
248    }
249    return hRegion;
250  }
251
252  private HStore getHStore(String region, String cf) throws DataTieringException {
253    HRegion hRegion = getHRegion(region);
254    HStore hStore = hRegion.getStore(Bytes.toBytes(cf));
255    if (hStore == null) {
256      throw new DataTieringException(
257        "HStore corresponding to " + region + "/" + cf + " doesn't exist");
258    }
259    return hStore;
260  }
261
262  private HStoreFile getHStoreFile(String region, String cf, String fileName)
263    throws DataTieringException {
264    HStore hStore = getHStore(region, cf);
265    for (HStoreFile file : hStore.getStorefiles()) {
266      if (file.getPath().getName().equals(fileName)) {
267        return file;
268      }
269    }
270    return null;
271  }
272
273  private Configuration getConfiguration(Path hFilePath) throws DataTieringException {
274    String regionName = null;
275    String cfName = null;
276    try {
277      regionName = hFilePath.getParent().getParent().getName();
278      cfName = hFilePath.getParent().getName();
279    } catch (Exception e) {
280      throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
281    }
282    if (regionName == null || cfName == null) {
283      throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
284    }
285    HStore hStore = getHStore(regionName, cfName);
286    return hStore.getReadOnlyConfiguration();
287  }
288
289  private DataTieringType getDataTieringType(Configuration conf) {
290    return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name()));
291  }
292
293  private long getDataTieringHotDataAge(Configuration conf) {
294    return Long.parseLong(
295      conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
296  }
297
298  private long getDataTieringGracePeriod(Configuration conf) {
299    return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY,
300      String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD)));
301  }
302
303  /*
304   * This API traverses through the list of online regions and returns a subset of these files-names
305   * that are cold.
306   * @return List of names of files with cold data as per data-tiering logic.
307   */
308  public Map<String, String> getColdFilesList() {
309    Map<String, String> coldFiles = new HashMap<>();
310    for (HRegion r : this.onlineRegions.values()) {
311      for (HStore hStore : r.getStores()) {
312        Configuration conf = hStore.getReadOnlyConfiguration();
313        DataTieringType dataTieringType = getDataTieringType(conf);
314        if (dataTieringType == DataTieringType.NONE) {
315          // Data-Tiering not enabled for the store. Just skip it.
316          continue;
317        }
318        Long hotDataAge = getDataTieringHotDataAge(conf);
319
320        for (HStoreFile hStoreFile : hStore.getStorefiles()) {
321          String hFileName =
322            hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
323          long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
324          LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp));
325          long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
326          long fileAge = currentTimestamp - maxTimeStamp;
327          if (fileAge > hotDataAge) {
328            // Values do not matter.
329            coldFiles.put(hFileName, null);
330          }
331        }
332      }
333    }
334    return coldFiles;
335  }
336
337  private static boolean isDataTieringFeatureEnabled(Configuration conf) {
338    return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY,
339      DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED);
340  }
341
342  // Resets the instance to null. To be used only for testing.
343  public static void resetForTestingOnly() {
344    instance = null;
345  }
346}