001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.io.hfile.HFileInfo.FILE_PATH;
021import static org.apache.hadoop.hbase.io.hfile.HFileInfo.FILE_SIZE;
022
023import java.util.ArrayList;
024import java.util.Collection;
025import java.util.Date;
026import java.util.HashMap;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import java.util.concurrent.ConcurrentHashMap;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
035import org.apache.hadoop.hbase.io.hfile.HFileInfo;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
038import org.apache.hadoop.hbase.util.Pair;
039import org.apache.yetus.audience.InterfaceAudience;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043/**
044 * The DataTieringManager class categorizes data into hot data and cold data based on the specified
045 * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with
046 * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType}
047 * determines the logic for distinguishing data into hot or cold. By default, all data is considered
048 * as hot.
049 */
050@InterfaceAudience.Private
051public class DataTieringManager {
052  private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class);
053  public static final String GLOBAL_DATA_TIERING_ENABLED_KEY =
054    "hbase.regionserver.datatiering.enable";
055  public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default
056  public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type";
057  public static final String HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY =
058    "hbase.hstore.datatiering.grace.period.millis";
059  public static final long DEFAULT_DATATIERING_GRACE_PERIOD = 0;
060  public static final String DATATIERING_HOT_DATA_AGE_KEY =
061    "hbase.hstore.datatiering.hot.age.millis";
062  public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE;
063  public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days
064  private static DataTieringManager instance;
065  private final Map<String, HRegion> onlineRegions;
066
067  // Accounts for the total size of cold data in each region, together with a list of cold files in
068  // that region.
069  private final Map<String, Pair<List<String>, Long>> regionColdDataSize =
070    new ConcurrentHashMap<>();
071
072  private DataTieringManager(Map<String, HRegion> onlineRegions) {
073    this.onlineRegions = onlineRegions;
074  }
075
076  /**
077   * Initializes the DataTieringManager instance with the provided map of online regions, only if
078   * the configuration "hbase.regionserver.datatiering.enable" is enabled.
079   * @param conf          Configuration object.
080   * @param onlineRegions A map containing online regions.
081   * @return True if the instance is instantiated successfully, false otherwise.
082   */
083  public static synchronized boolean instantiate(Configuration conf,
084    Map<String, HRegion> onlineRegions) {
085    if (!isDataTieringFeatureEnabled(conf)) {
086      LOG.debug("DataTiering feature is disabled (key: {}). Skipping instantiation.",
087        GLOBAL_DATA_TIERING_ENABLED_KEY);
088      return false;
089    }
090    if (instance != null) {
091      LOG.warn("DataTieringManager is already instantiated.");
092      return false;
093    }
094    instance = new DataTieringManager(onlineRegions);
095    LOG.info("DataTieringManager instantiated successfully.");
096    return true;
097  }
098
099  /**
100   * Retrieves the instance of DataTieringManager.
101   * @return The instance of DataTieringManager, if instantiated, null otherwise.
102   */
103  public static synchronized DataTieringManager getInstance() {
104    return instance;
105  }
106
107  /**
108   * Determines whether data tiering is enabled for the given block cache key.
109   * @param key the block cache key
110   * @return {@code true} if data tiering is enabled for the HFile associated with the key,
111   *         {@code false} otherwise
112   * @throws DataTieringException if there is an error retrieving the HFile path or configuration
113   */
114  public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException {
115    if (key.getCfName() == null || key.getRegionName() == null) {
116      throw new DataTieringException(
117        "BlockCacheKey doesn't contain Column Family Name or Region Name");
118    }
119    Configuration configuration =
120      getHStore(key.getRegionName(), key.getCfName()).getReadOnlyConfiguration();
121    DataTieringType dataTieringType = getDataTieringType(configuration);
122    return !dataTieringType.equals(DataTieringType.NONE);
123  }
124
125  /**
126   * Determines whether data tiering is enabled for the given HFile path.
127   * @param hFilePath the path to the HFile
128   * @return {@code true} if data tiering is enabled, {@code false} otherwise
129   * @throws DataTieringException if there is an error retrieving the configuration
130   */
131  public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException {
132    Configuration configuration = getConfiguration(hFilePath);
133    DataTieringType dataTieringType = getDataTieringType(configuration);
134    return !dataTieringType.equals(DataTieringType.NONE);
135  }
136
137  /**
138   * Determines whether the data associated with the given block cache key is considered hot. If the
139   * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not
140   * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by
141   * default.
142   * @param key the block cache key
143   * @return {@code true} if the data is hot, {@code false} otherwise
144   * @throws DataTieringException if there is an error retrieving data tiering information
145   */
146  public boolean isHotData(BlockCacheKey key) throws DataTieringException {
147    if (key.getRegionName() == null) {
148      throw new DataTieringException("BlockCacheKey doesn't contain Region Name");
149    }
150    if (key.getCfName() == null) {
151      throw new DataTieringException("BlockCacheKey doesn't contain CF Name");
152    }
153    if (key.getHfileName() == null) {
154      throw new DataTieringException("BlockCacheKey doesn't contain File Name");
155    }
156    return isHotData(key.getRegionName(), key.getCfName(), key.getHfileName());
157  }
158
159  /**
160   * Determines whether the data associated with the given time range tracker is considered hot. If
161   * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum
162   * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers
163   * the data as hot by default.
164   * @param maxTimestamp the maximum timestamp associated with the data.
165   * @param conf         The configuration object to use for determining hot data criteria.
166   * @return {@code true} if the data is hot, {@code false} otherwise
167   */
168  public boolean isHotData(long maxTimestamp, Configuration conf) {
169    if (isWithinGracePeriod(maxTimestamp, conf)) {
170      return true;
171    }
172    DataTieringType dataTieringType = getDataTieringType(conf);
173
174    if (
175      !dataTieringType.equals(DataTieringType.NONE)
176        && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP
177    ) {
178      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf));
179    }
180    // DataTieringType.NONE or other types are considered hot by default
181    return true;
182  }
183
184  private boolean isHotData(String region, String cf, String fileName) throws DataTieringException {
185    Configuration configuration = getHStore(region, cf).getReadOnlyConfiguration();
186    DataTieringType dataTieringType = getDataTieringType(configuration);
187    if (!dataTieringType.equals(DataTieringType.NONE)) {
188      HStoreFile hStoreFile = getHStoreFile(region, cf, fileName);
189      if (hStoreFile == null) {
190        throw new DataTieringException(
191          "Store file corresponding to " + region + "/" + cf + "/" + fileName + " doesn't exist");
192      }
193      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
194      if (isWithinGracePeriod(maxTimestamp, configuration)) {
195        return true;
196      }
197      return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
198    }
199    // DataTieringType.NONE or other types are considered hot by default
200    return true;
201  }
202
203  /**
204   * Determines whether the data in the HFile being read is considered hot based on the configured
205   * data tiering type and hot data age. If the data tiering type is set to
206   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
207   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
208   * @param hFileInfo     Information about the HFile to determine if its data is hot.
209   * @param configuration The configuration object to use for determining hot data criteria.
210   * @return {@code true} if the data is hot, {@code false} otherwise
211   */
212  public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) {
213    DataTieringType dataTieringType = getDataTieringType(configuration);
214    if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) {
215      long maxTimestamp = dataTieringType.getInstance().getTimestamp(hFileInfo);
216      if (isWithinGracePeriod(maxTimestamp, configuration)) {
217        return true;
218      }
219      LOG.debug("Max TS: {} for file {}. Cutoff Age TS: {}", maxTimestamp,
220        hFileInfo.getHFileContext().getHFileName(), getDataTieringHotDataAge(configuration));
221      boolean isHot = hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration));
222      if (!isHot) {
223        Path path = new Path(Bytes.toString(hFileInfo.get(FILE_PATH)));
224        String regionName = path.getParent().getParent().getName();
225        regionColdDataSize.compute(regionName, (k, v) -> {
226          if (v == null) {
227            List<String> files = new ArrayList<>();
228            files.add(hFileInfo.getHFileContext().getHFileName());
229            LOG.debug("computing file {} with size {} as cold data for region {}",
230              hFileInfo.getHFileContext().getHFileName(), Bytes.toLong(hFileInfo.get(FILE_SIZE)),
231              regionName);
232            return new Pair<>(files, Bytes.toLong(hFileInfo.get(FILE_SIZE)));
233          } else {
234            if (!v.getFirst().contains(hFileInfo.getHFileContext().getHFileName())) {
235              v.getFirst().add(hFileInfo.getHFileContext().getHFileName());
236              v.setSecond(v.getSecond() + Bytes.toLong(hFileInfo.get(FILE_SIZE)));
237              LOG.debug(
238                "adding file {} with size {} as cold data for region {}. Total cold data size for the region is {}",
239                hFileInfo.getHFileContext().getHFileName(), Bytes.toLong(hFileInfo.get(FILE_SIZE)),
240                regionName, v.getSecond());
241            }
242            return v;
243          }
244        });
245      }
246      return isHot;
247    }
248    // DataTieringType.NONE or other types are considered hot by default
249    return true;
250  }
251
252  private boolean isWithinGracePeriod(long maxTimestamp, Configuration conf) {
253    long gracePeriod = getDataTieringGracePeriod(conf);
254    return gracePeriod > 0 && (getCurrentTimestamp() - maxTimestamp) < gracePeriod;
255  }
256
257  private boolean hotDataValidator(long maxTimestamp, long hotDataAge) {
258    long currentTimestamp = getCurrentTimestamp();
259    long diff = currentTimestamp - maxTimestamp;
260    return diff <= hotDataAge;
261  }
262
263  private long getCurrentTimestamp() {
264    return EnvironmentEdgeManager.getDelegate().currentTime();
265  }
266
267  /**
268   * Returns a set of cold data filenames from the given set of cached blocks. Cold data is
269   * determined by the configured data tiering type and hot data age.
270   * @param allCachedBlocks a set of all cached block cache keys
271   * @return a set of cold data filenames
272   * @throws DataTieringException if there is an error determining whether a block is hot
273   */
274  public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks)
275    throws DataTieringException {
276    Set<String> coldHFiles = new HashSet<>();
277    for (BlockCacheKey key : allCachedBlocks) {
278      if (coldHFiles.contains(key.getHfileName())) {
279        continue;
280      }
281      if (!isHotData(key)) {
282        coldHFiles.add(key.getHfileName());
283      }
284    }
285    return coldHFiles;
286  }
287
288  private HRegion getHRegion(String region) throws DataTieringException {
289    HRegion hRegion = this.onlineRegions.get(region);
290    if (hRegion == null) {
291      throw new DataTieringException("HRegion corresponding to " + region + " doesn't exist");
292    }
293    return hRegion;
294  }
295
296  private HStore getHStore(String region, String cf) throws DataTieringException {
297    HRegion hRegion = getHRegion(region);
298    HStore hStore = hRegion.getStore(Bytes.toBytes(cf));
299    if (hStore == null) {
300      throw new DataTieringException(
301        "HStore corresponding to " + region + "/" + cf + " doesn't exist");
302    }
303    return hStore;
304  }
305
306  private HStoreFile getHStoreFile(String region, String cf, String fileName)
307    throws DataTieringException {
308    HStore hStore = getHStore(region, cf);
309    for (HStoreFile file : hStore.getStorefiles()) {
310      if (file.getPath().getName().equals(fileName)) {
311        return file;
312      }
313    }
314    return null;
315  }
316
317  private Configuration getConfiguration(Path hFilePath) throws DataTieringException {
318    String regionName = null;
319    String cfName = null;
320    try {
321      regionName = hFilePath.getParent().getParent().getName();
322      cfName = hFilePath.getParent().getName();
323    } catch (Exception e) {
324      throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
325    }
326    if (regionName == null || cfName == null) {
327      throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
328    }
329    HStore hStore = getHStore(regionName, cfName);
330    return hStore.getReadOnlyConfiguration();
331  }
332
333  private DataTieringType getDataTieringType(Configuration conf) {
334    return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name()));
335  }
336
337  private long getDataTieringHotDataAge(Configuration conf) {
338    return Long.parseLong(
339      conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
340  }
341
342  private long getDataTieringGracePeriod(Configuration conf) {
343    return Long.parseLong(conf.get(HSTORE_DATATIERING_GRACE_PERIOD_MILLIS_KEY,
344      String.valueOf(DEFAULT_DATATIERING_GRACE_PERIOD)));
345  }
346
347  /*
348   * This API traverses through the list of online regions and returns a subset of these files-names
349   * that are cold.
350   * @return List of names of files with cold data as per data-tiering logic.
351   */
352  public Map<String, String> getColdFilesList() {
353    Map<String, String> coldFiles = new HashMap<>();
354    for (HRegion r : this.onlineRegions.values()) {
355      for (HStore hStore : r.getStores()) {
356        Configuration conf = hStore.getReadOnlyConfiguration();
357        DataTieringType dataTieringType = getDataTieringType(conf);
358        if (dataTieringType == DataTieringType.NONE) {
359          // Data-Tiering not enabled for the store. Just skip it.
360          continue;
361        }
362        Long hotDataAge = getDataTieringHotDataAge(conf);
363
364        for (HStoreFile hStoreFile : hStore.getStorefiles()) {
365          String hFileName =
366            hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
367          long maxTimeStamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
368          LOG.debug("Max TS for file {} is {}", hFileName, new Date(maxTimeStamp));
369          long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
370          long fileAge = currentTimestamp - maxTimeStamp;
371          if (fileAge > hotDataAge) {
372            // Values do not matter.
373            coldFiles.put(hFileName, null);
374          }
375        }
376      }
377    }
378    return coldFiles;
379  }
380
381  private static boolean isDataTieringFeatureEnabled(Configuration conf) {
382    return conf.getBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY,
383      DataTieringManager.DEFAULT_GLOBAL_DATA_TIERING_ENABLED);
384  }
385
386  // Resets the instance to null. To be used only for testing.
387  public static void resetForTestingOnly() {
388    instance = null;
389  }
390
391  public Map<String, Pair<List<String>, Long>> getRegionColdDataSize() {
392    return regionColdDataSize;
393  }
394
395  /**
396   * Updates regionColdData size for the region containing the passed compactedFiles.
397   */
398  public void updateRegionColdDataSize(String encodedRegionName,
399    Collection<HStoreFile> compactedFiles, Collection<HStoreFile> newFiles) {
400    regionColdDataSize.computeIfPresent(encodedRegionName, (k, v) -> {
401      for (HStoreFile file : compactedFiles) {
402        if (v.getFirst().contains(file.getPath().getName())) {
403          v.getFirst().remove(file.getPath().getName());
404          v.setSecond(v.getSecond() - Bytes.toLong(file.getMetadataValue(FILE_SIZE)));
405        }
406      }
407      for (HStoreFile file : newFiles) {
408        // call isHotData to account for the new file size in regionColdDataSize, if the new file is
409        // considered cold data as per data-tiering logic.
410        isHotData(file.getFileInfo().getHFileInfo(), file.getFileInfo().getConf());
411      }
412      return v;
413    });
414  }
415}