001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to you under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.hadoop.hbase.quotas;
018
019import java.util.HashSet;
020import java.util.Iterator;
021import java.util.Set;
022import java.util.concurrent.TimeUnit;
023
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.ScheduledChore;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.regionserver.HRegion;
028import org.apache.hadoop.hbase.regionserver.HRegionServer;
029import org.apache.hadoop.hbase.regionserver.Region;
030import org.apache.hadoop.hbase.regionserver.Store;
031import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
032import org.apache.yetus.audience.InterfaceAudience;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/**
037 * A chore which computes the size of each {@link HRegion} on the FileSystem hosted by the given
038 * {@link HRegionServer}. The results of this computation are stored in the
039 * {@link RegionServerSpaceQuotaManager}'s {@link RegionSizeStore} object.
040 */
041@InterfaceAudience.Private
042public class FileSystemUtilizationChore extends ScheduledChore {
043  private static final Logger LOG = LoggerFactory.getLogger(FileSystemUtilizationChore.class);
044  static final String FS_UTILIZATION_CHORE_PERIOD_KEY = "hbase.regionserver.quotas.fs.utilization.chore.period";
045  static final int FS_UTILIZATION_CHORE_PERIOD_DEFAULT = 1000 * 60 * 5; // 5 minutes in millis
046
047  static final String FS_UTILIZATION_CHORE_DELAY_KEY = "hbase.regionserver.quotas.fs.utilization.chore.delay";
048  static final long FS_UTILIZATION_CHORE_DELAY_DEFAULT = 1000L * 60L; // 1 minute
049
050  static final String FS_UTILIZATION_CHORE_TIMEUNIT_KEY = "hbase.regionserver.quotas.fs.utilization.chore.timeunit";
051  static final String FS_UTILIZATION_CHORE_TIMEUNIT_DEFAULT = TimeUnit.MILLISECONDS.name();
052
053  static final String FS_UTILIZATION_MAX_ITERATION_DURATION_KEY = "hbase.regionserver.quotas.fs.utilization.chore.max.iteration.millis";
054  static final long FS_UTILIZATION_MAX_ITERATION_DURATION_DEFAULT = 5000L;
055
056  private final HRegionServer rs;
057  private final long maxIterationMillis;
058  private Iterator<Region> leftoverRegions;
059
060  public FileSystemUtilizationChore(HRegionServer rs) {
061    super(FileSystemUtilizationChore.class.getSimpleName(), rs, getPeriod(rs.getConfiguration()),
062        getInitialDelay(rs.getConfiguration()), getTimeUnit(rs.getConfiguration()));
063    this.rs = rs;
064    this.maxIterationMillis = rs.getConfiguration().getLong(
065        FS_UTILIZATION_MAX_ITERATION_DURATION_KEY, FS_UTILIZATION_MAX_ITERATION_DURATION_DEFAULT);
066  }
067
068  @Override
069  protected void chore() {
070    final RegionSizeStore regionSizeStore = getRegionSizeStore();
071    final Set<Region> onlineRegions = new HashSet<>(rs.getRegions());
072    // Process the regions from the last run if we have any. If we are somehow having difficulty
073    // processing the Regions, we want to avoid creating a backlog in memory of Region objs.
074    Iterator<Region> oldRegionsToProcess = getLeftoverRegions();
075    final Iterator<Region> iterator;
076    final boolean processingLeftovers;
077    if (oldRegionsToProcess == null) {
078      iterator = onlineRegions.iterator();
079      processingLeftovers = false;
080    } else {
081      iterator = oldRegionsToProcess;
082      processingLeftovers = true;
083    }
084    // Reset the leftoverRegions and let the loop re-assign if necessary.
085    setLeftoverRegions(null);
086    long regionSizesCalculated = 0L;
087    long offlineRegionsSkipped = 0L;
088    long skippedSplitParents = 0L;
089    long skippedRegionReplicas = 0L;
090    final long start = EnvironmentEdgeManager.currentTime();
091    while (iterator.hasNext()) {
092      // Make sure this chore doesn't hog the thread.
093      long timeRunning = EnvironmentEdgeManager.currentTime() - start;
094      if (timeRunning > maxIterationMillis) {
095        LOG.debug("Preempting execution of FileSystemUtilizationChore because it exceeds the"
096            + " maximum iteration configuration value. Will process remaining Regions"
097            + " on a subsequent invocation.");
098        setLeftoverRegions(iterator);
099        break;
100      }
101
102      final Region region = iterator.next();
103      // If we're processing leftover regions, the region may no-longer be online.
104      // If so, we can skip it.
105      if (processingLeftovers && !onlineRegions.contains(region)) {
106        offlineRegionsSkipped++;
107        continue;
108      }
109      // Avoid computing the size of regions which are the parent of split.
110      if (region.getRegionInfo().isSplitParent()) {
111        skippedSplitParents++;
112        continue;
113      }
114      // Avoid computing the size of region replicas.
115      if (RegionInfo.DEFAULT_REPLICA_ID != region.getRegionInfo().getReplicaId()) {
116        skippedRegionReplicas++;
117        continue;
118      }
119      final long sizeInBytes = computeSize(region);
120      regionSizeStore.put(region.getRegionInfo(), sizeInBytes);
121      regionSizesCalculated++;
122    }
123    if (LOG.isTraceEnabled()) {
124      LOG.trace("Computed the size of " + regionSizesCalculated + " Regions. Skipped computation"
125          + " of " + offlineRegionsSkipped + " regions due to not being online on this RS, "
126          + skippedSplitParents + " regions due to being the parent of a split, and"
127          + skippedRegionReplicas + " regions due to being region replicas.");
128    }
129  }
130
131  /**
132   * Returns an {@link Iterator} over the Regions which were skipped last invocation of the chore.
133   *
134   * @return Regions from the previous invocation to process, or null.
135   */
136  Iterator<Region> getLeftoverRegions() {
137    return leftoverRegions;
138  }
139
140  /**
141   * Sets a new collection of Regions as leftovers.
142   */
143  void setLeftoverRegions(Iterator<Region> newLeftovers) {
144    this.leftoverRegions = newLeftovers;
145  }
146
147  /**
148   * Computes total FileSystem size for the given {@link Region}.
149   *
150   * @param r The region
151   * @return The size, in bytes, of the Region.
152   */
153  long computeSize(Region r) {
154    long regionSize = 0L;
155    for (Store store : r.getStores()) {
156      regionSize += store.getHFilesSize();
157    }
158    if (LOG.isTraceEnabled()) {
159      LOG.trace("Size of " + r + " is " + regionSize);
160    }
161    return regionSize;
162  }
163
164  // VisibleForTesting
165  RegionSizeStore getRegionSizeStore() {
166    return rs.getRegionServerSpaceQuotaManager().getRegionSizeStore();
167  }
168
169  /**
170   * Extracts the period for the chore from the configuration.
171   *
172   * @param conf The configuration object.
173   * @return The configured chore period or the default value.
174   */
175  static int getPeriod(Configuration conf) {
176    return conf.getInt(FS_UTILIZATION_CHORE_PERIOD_KEY, FS_UTILIZATION_CHORE_PERIOD_DEFAULT);
177  }
178
179  /**
180   * Extracts the initial delay for the chore from the configuration.
181   *
182   * @param conf The configuration object.
183   * @return The configured chore initial delay or the default value.
184   */
185  static long getInitialDelay(Configuration conf) {
186    return conf.getLong(FS_UTILIZATION_CHORE_DELAY_KEY, FS_UTILIZATION_CHORE_DELAY_DEFAULT);
187  }
188
189  /**
190   * Extracts the time unit for the chore period and initial delay from the configuration. The
191   * configuration value for {@link #FS_UTILIZATION_CHORE_TIMEUNIT_KEY} must correspond to a
192   * {@link TimeUnit} value.
193   *
194   * @param conf The configuration object.
195   * @return The configured time unit for the chore period and initial delay or the default value.
196   */
197  static TimeUnit getTimeUnit(Configuration conf) {
198    return TimeUnit.valueOf(conf.get(FS_UTILIZATION_CHORE_TIMEUNIT_KEY,
199        FS_UTILIZATION_CHORE_TIMEUNIT_DEFAULT));
200  }
201}