001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.Date;
022import java.util.HashMap;
023import java.util.HashSet;
024import java.util.Map;
025import java.util.Set;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
029import org.apache.hadoop.hbase.io.hfile.HFileInfo;
030import org.apache.hadoop.hbase.util.Bytes;
031import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
032import org.apache.yetus.audience.InterfaceAudience;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/**
037 * The DataTieringManager class categorizes data into hot data and cold data based on the specified
038 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with
039 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType}
040 * determines the logic for distinguishing data into hot or cold. By default, all data is considered
041 * as hot.
042 */
043@InterfaceAudience.Private
044public class DataTieringManager {
045  private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class);
046  public static final String GLOBAL_DATA_TIERING_ENABLED_KEY =
047    "hbase.regionserver.datatiering.enable";
048  public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default
049  public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type";
050  public static final String DATATIERING_HOT_DATA_AGE_KEY =
051    "hbase.hstore.datatiering.hot.age.millis";
052  public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE;
053  public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days
054  private static DataTieringManager instance;
055  private final Map<String, HRegion> onlineRegions;
056
057  private DataTieringManager(Map<String, HRegion> onlineRegions) {
058    this.onlineRegions = onlineRegions;
059  }
060
061  /**
062   * Initializes the DataTieringManager instance with the provided map of online regions, only if
063   * the configuration "hbase.regionserver.datatiering.enable" is enabled.
064   * @param conf          Configuration object.
065   * @param onlineRegions A map containing online regions.
066   * @return True if the instance is instantiated successfully, false otherwise.
067   */
068  public static synchronized boolean instantiate(Configuration conf,
069    Map<String, HRegion> onlineRegions) {
070    if (isDataTieringFeatureEnabled(conf) && instance == null) {
071      instance = new DataTieringManager(onlineRegions);
072      LOG.info("DataTieringManager instantiated successfully.");
073      return true;
074    } else {
075      LOG.warn("DataTieringManager is already instantiated.");
076    }
077    return false;
078  }
079
080  /**
081   * Retrieves the instance of DataTieringManager.
082   * @return The instance of DataTieringManager, if instantiated, null otherwise.
083   */
084  public static synchronized DataTieringManager getInstance() {
085    return instance;
086  }
087
088  /**
089   * Determines whether data tiering is enabled for the given block cache key.
090   * @param key the block cache key
091   * @return {@code true} if data tiering is enabled for the HFile associated with the key,
092   *         {@code false} otherwise
093   * @throws DataTieringException if there is an error retrieving the HFile path or configuration
094   */
095  public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException {
096    Path hFilePath = key.getFilePath();
097    if (hFilePath == null) {
098      throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
099    }
100    return isDataTieringEnabled(hFilePath);
101  }
102
103  /**
104   * Determines whether data tiering is enabled for the given HFile path.
105   * @param hFilePath the path to the HFile
106   * @return {@code true} if data tiering is enabled, {@code false} otherwise
107   * @throws DataTieringException if there is an error retrieving the configuration
108   */
109  public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException {
110    Configuration configuration = getConfiguration(hFilePath);
111    DataTieringType dataTieringType = getDataTieringType(configuration);
112    return !dataTieringType.equals(DataTieringType.NONE);
113  }
114
115  /**
116   * Determines whether the data associated with the given block cache key is considered hot. If the
117   * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not
118   * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by
119   * default.
120   * @param key the block cache key
121   * @return {@code true} if the data is hot, {@code false} otherwise
122   * @throws DataTieringException if there is an error retrieving data tiering information
123   */
124  public boolean isHotData(BlockCacheKey key) throws DataTieringException {
125    Path hFilePath = key.getFilePath();
126    if (hFilePath == null) {
127      throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
128    }
129    return isHotData(hFilePath);
130  }
131
132  /**
133   * Determines whether the data associated with the given time range tracker is considered hot. If
134   * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum
135   * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers
136   * the data as hot by default.
137   * @param maxTimestamp the maximum timestamp associated with the data.
138   * @param conf         The configuration object to use for determining hot data criteria.
139   * @return {@code true} if the data is hot, {@code false} otherwise
140   */
141  public boolean isHotData(long maxTimestamp, Configuration conf) {
142    DataTieringType dataTieringType = getDataTieringType(conf);
143
144    if (
145      !dataTieringType.equals(DataTieringType.NONE)
146        && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP
147    ) {
148      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf));
149    }
150    // DataTieringType.NONE or other types are considered hot by default
151    return true;
152  }
153
154  /**
155   * Determines whether the data in the HFile at the given path is considered hot based on the
156   * configured data tiering type and hot data age. If the data tiering type is set to
157   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
158   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
159   * @param hFilePath the path to the HFile
160   * @return {@code true} if the data is hot, {@code false} otherwise
161   * @throws DataTieringException if there is an error retrieving data tiering information
162   */
163  public boolean isHotData(Path hFilePath) throws DataTieringException {
164    Configuration configuration = getConfiguration(hFilePath);
165    DataTieringType dataTieringType = getDataTieringType(configuration);
166
167    if (!dataTieringType.equals(DataTieringType.NONE)) {
168      HStoreFile hStoreFile = getHStoreFile(hFilePath);
169      if (hStoreFile == null) {
170        throw new DataTieringException(
171          "Store file corresponding to " + hFilePath + " doesn't exist");
172      }
173      return hotDataValidator(dataTieringType.getInstance().getTimestamp(getHStoreFile(hFilePath)),
174        getDataTieringHotDataAge(configuration));
175    }
176    // DataTieringType.NONE or other types are considered hot by default
177    return true;
178  }
179
180  /**
181   * Determines whether the data in the HFile being read is considered hot based on the configured
182   * data tiering type and hot data age. If the data tiering type is set to
183   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
184   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
185   * @param hFileInfo     Information about the HFile to determine if its data is hot.
186   * @param configuration The configuration object to use for determining hot data criteria.
187   * @return {@code true} if the data is hot, {@code false} otherwise
188   */
189  public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) {
190    DataTieringType dataTieringType = getDataTieringType(configuration);
191    if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) {
192      return hotDataValidator(dataTieringType.getInstance().getTimestamp(hFileInfo),
193        getDataTieringHotDataAge(configuration));
194    }
195    // DataTieringType.NONE or other types are considered hot by default
196    return true;
197  }
198
199  private boolean hotDataValidator(long maxTimestamp, long hotDataAge) {
200    long currentTimestamp = getCurrentTimestamp();
201    long diff = currentTimestamp - maxTimestamp;
202    return diff <= hotDataAge;
203  }
204
205  private long getCurrentTimestamp() {
206    return EnvironmentEdgeManager.getDelegate().currentTime();
207  }
208
209  /**
210   * Returns a set of cold data filenames from the given set of cached blocks. Cold data is
211   * determined by the configured data tiering type and hot data age.
212   * @param allCachedBlocks a set of all cached block cache keys
213   * @return a set of cold data filenames
214   * @throws DataTieringException if there is an error determining whether a block is hot
215   */
216  public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks)
217    throws DataTieringException {
218    Set<String> coldHFiles = new HashSet<>();
219    for (BlockCacheKey key : allCachedBlocks) {
220      if (coldHFiles.contains(key.getHfileName())) {
221        continue;
222      }
223      if (!isHotData(key)) {
224        coldHFiles.add(key.getHfileName());
225      }
226    }
227    return coldHFiles;
228  }
229
230  private HRegion getHRegion(Path hFilePath) throws DataTieringException {
231    String regionId;
232    try {
233      regionId = HRegionFileSystem.getRegionId(hFilePath);
234    } catch (IOException e) {
235      throw new DataTieringException(e.getMessage());
236    }
237    HRegion hRegion = this.onlineRegions.get(regionId);
238    if (hRegion == null) {
239      throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist");
240    }
241    return hRegion;
242  }
243
244  private HStore getHStore(Path hFilePath) throws DataTieringException {
245    HRegion hRegion = getHRegion(hFilePath);
246    String columnFamily = hFilePath.getParent().getName();
247    HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily));
248    if (hStore == null) {
249      throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist");
250    }
251    return hStore;
252  }
253
254  private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException {
255    HStore hStore = getHStore(hFilePath);
256    for (HStoreFile file : hStore.getStorefiles()) {
257      if (file.getPath().toUri().getPath().toString().equals(hFilePath.toString())) {
258        return file;
259      }
260    }
261    return null;
262  }
263
264  private Configuration getConfiguration(Path hFilePath) throws DataTieringException {
265    HStore hStore = getHStore(hFilePath);
266    return hStore.getReadOnlyConfiguration();
267  }
268
269  private DataTieringType getDataTieringType(Configuration conf) {
270    return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name()));
271  }
272
273  private long getDataTieringHotDataAge(Configuration conf) {
274    return Long.parseLong(
275      conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
276  }
277
278  /*
279   * This API traverses through the list of online regions and returns a subset of these files-names
280   * that are cold.
281   * @return List of names of files with cold data as per data-tiering logic.
282   */
283  public Map<String, String> getColdFilesList() {
284    Map<String, String> coldFiles = new HashMap<>();
285    for (HRegion r : this.onlineRegions.values()) {
286      for (HStore hStore : r.getStores()) {
287        Configuration conf = hStore.getReadOnlyConfiguration();
288        DataTieringType dataTieringType = getDataTieringType(conf);
289        if (dataTieringType == DataTieringType.NONE) {
290          // Data-Tiering not enabled for the store. Just skip it.
291          continue;
292        }
293        Long hotDataAge = getDataTieringHotDataAge(conf);
294
295        for (HStoreFile hStoreFile : hStore.getStorefiles()) {
296          String hFileName =
297            hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
298          long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
299          LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp));
300          long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
301          long fileAge = currentTimestamp - maxTimeStamp;
302          if (fileAge > hotDataAge) {
303            // Values do not matter.
304            coldFiles.put(hFileName, null);
305          }
306        }
307      }
308    }
309    return coldFiles;
310  }
311
312  private static boolean isDataTieringFeatureEnabled(Configuration conf) {
313    return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY,
314      DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED);
315  }
316
317  // Resets the instance to null. To be used only for testing.
318  public static void resetForTestingOnly() {
319    instance = null;
320  }
321}