001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.master.normalizer;
020
021import java.sql.Timestamp;
022import java.util.ArrayList;
023import java.util.List;
024import java.util.concurrent.TimeUnit;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HBaseConfiguration;
027import org.apache.hadoop.hbase.HBaseIOException;
028import org.apache.hadoop.hbase.TableName;
029import org.apache.hadoop.hbase.client.RegionInfo;
030import org.apache.yetus.audience.InterfaceAudience;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034/**
035 * Implementation of MergeNormalizer Logic in use:
036 * <ol>
037 * <li>get all regions of a given table
038 * <li>get avg size S of each region (by total size of store files reported in RegionLoad)
039 * <li>two regions R1 and its neighbour R2 are merged, if R1 + R2 &lt; S, and all such regions are
040 * returned to be merged
041 * <li>Otherwise, no action is performed
042 * </ol>
043 * <p>
044 * Considering the split policy takes care of splitting region we also want a way to merge when
045 * regions are too small. It is little different than what
046 * {@link org.apache.hadoop.hbase.master.normalizer.SimpleRegionNormalizer} does. Instead of doing
047 * splits and merge both to achieve average region size in cluster for a table. We only merge
048 * regions(older than defined age) and rely on Split policy for region splits. The goal of this
049 * normalizer is to merge small regions to make size of regions close to average size (which is
050 * either average size or depends on either target region size or count in that order). Consider
051 * region with size 1,2,3,4,10,10,10,5,4,3. If minimum merge age is set to 0 days this algorithm
052 * will find the average size as 7.2 assuming we haven't provided target region count or size. Now
053 * we will find all those adjacent region which if merged doesn't exceed the average size. so we
054 * will merge 1-2, 3-4, 4,3 in our first run. To get best results from this normalizer theoretically
055 * we should set target region size between 0.5 to 0.75 of configured maximum file size. If we set
056 * min merge age as 3 we create plan as above and see if we have a plan which has both regions as
057 * new(age less than 3) we discard such plans and we consider the regions even if one of the region
058 * is old enough to be merged.
059 * </p>
060 */
061
062@InterfaceAudience.Private
063public class MergeNormalizer extends AbstractRegionNormalizer {
064  private static final Logger LOG = LoggerFactory.getLogger(MergeNormalizer.class);
065
066  private int minRegionCount;
067  private int minRegionAge;
068  private static long[] skippedCount = new long[NormalizationPlan.PlanType.values().length];
069
070  public MergeNormalizer() {
071    Configuration conf = HBaseConfiguration.create();
072    minRegionCount = conf.getInt("hbase.normalizer.min.region.count", 3);
073    minRegionAge = conf.getInt("hbase.normalizer.min.region.merge.age", 3);
074  }
075
076  @Override
077  public void planSkipped(RegionInfo hri, NormalizationPlan.PlanType type) {
078    skippedCount[type.ordinal()]++;
079  }
080
081  @Override
082  public long getSkippedCount(NormalizationPlan.PlanType type) {
083    return skippedCount[type.ordinal()];
084  }
085
086  @Override
087  public List<NormalizationPlan> computePlanForTable(TableName table) throws HBaseIOException {
088    List<NormalizationPlan> plans = new ArrayList<>();
089    if (!shouldNormalize(table)) {
090      return null;
091    }
092    // at least one of the two regions should be older than MIN_REGION_AGE days
093    List<NormalizationPlan> normalizationPlans = getMergeNormalizationPlan(table);
094    for (NormalizationPlan plan : normalizationPlans) {
095      if (plan instanceof MergeNormalizationPlan) {
096        RegionInfo hri = ((MergeNormalizationPlan) plan).getFirstRegion();
097        RegionInfo hri2 = ((MergeNormalizationPlan) plan).getSecondRegion();
098        if (isOldEnoughToMerge(hri) || isOldEnoughToMerge(hri2)) {
099          plans.add(plan);
100        } else {
101          LOG.debug("Skipping region {} and {} as they are both new", hri.getEncodedName(),
102            hri2.getEncodedName());
103        }
104      }
105    }
106    if (plans.isEmpty()) {
107      LOG.debug("No normalization needed, regions look good for table: {}", table);
108      return null;
109    }
110    return plans;
111  }
112
113  private boolean isOldEnoughToMerge(RegionInfo hri) {
114    Timestamp currentTime = new Timestamp(System.currentTimeMillis());
115    Timestamp hriTime = new Timestamp(hri.getRegionId());
116    boolean isOld =
117      new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(minRegionAge))
118        .before(currentTime);
119    return isOld;
120  }
121
122  private boolean shouldNormalize(TableName table) {
123    boolean normalize = false;
124    if (table == null || table.isSystemTable()) {
125      LOG.debug("Normalization of system table {} isn't allowed", table);
126    } else if (!isMergeEnabled()) {
127      LOG.debug("Merge disabled for table: {}", table);
128    } else {
129      List<RegionInfo> tableRegions =
130        masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table);
131      if (tableRegions == null || tableRegions.size() < minRegionCount) {
132        int nrRegions = tableRegions == null ? 0 : tableRegions.size();
133        LOG.debug(
134          "Table {} has {} regions, required min number of regions for normalizer to run is {} , "
135            + "not running normalizer",
136          table, nrRegions, minRegionCount);
137      } else {
138        normalize = true;
139      }
140    }
141    return normalize;
142  }
143}