001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.master;
020
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.ClusterMetrics;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.RegionMetrics;
031import org.apache.hadoop.hbase.ScheduledChore;
032import org.apache.hadoop.hbase.ServerMetrics;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.Stoppable;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import org.apache.hbase.thirdparty.org.apache.commons.collections4.MapUtils;
043
044/**
045 * This chore, every time it runs, will try to recover regions with high store ref count
046 * by reopening them
047 */
048@InterfaceAudience.Private
049public class RegionsRecoveryChore extends ScheduledChore {
050
051  private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryChore.class);
052
053  private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
054
055  private static final String ERROR_REOPEN_REIONS_MSG =
056    "Error reopening regions with high storeRefCount. ";
057
058  private final HMaster hMaster;
059  private final int storeFileRefCountThreshold;
060
061  private static final PerClientRandomNonceGenerator NONCE_GENERATOR =
062    PerClientRandomNonceGenerator.get();
063
064  /**
065   * Construct RegionsRecoveryChore with provided params
066   *
067   * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup
068   * @param configuration The configuration params to be used
069   * @param hMaster HMaster instance to initiate RegionTableRegions
070   */
071  RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
072      final HMaster hMaster) {
073
074    super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(
075      HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL));
076    this.hMaster = hMaster;
077    this.storeFileRefCountThreshold = configuration.getInt(
078      HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
079      HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
080
081  }
082
083  @Override
084  protected void chore() {
085    if (LOG.isTraceEnabled()) {
086      LOG.trace(
087        "Starting up Regions Recovery chore for reopening regions based on storeFileRefCount...");
088    }
089    try {
090      // only if storeFileRefCountThreshold > 0, consider the feature turned on
091      if (storeFileRefCountThreshold > 0) {
092        final ClusterMetrics clusterMetrics = hMaster.getClusterMetrics();
093        final Map<ServerName, ServerMetrics> serverMetricsMap =
094          clusterMetrics.getLiveServerMetrics();
095        final Map<TableName, List<byte[]>> tableToReopenRegionsMap =
096          getTableToRegionsByRefCount(serverMetricsMap);
097        if (MapUtils.isNotEmpty(tableToReopenRegionsMap)) {
098          tableToReopenRegionsMap.forEach((tableName, regionNames) -> {
099            try {
100              LOG.warn("Reopening regions due to high storeFileRefCount. " +
101                  "TableName: {} , noOfRegions: {}", tableName, regionNames.size());
102              hMaster.reopenRegions(tableName, regionNames, NONCE_GENERATOR.getNonceGroup(),
103                NONCE_GENERATOR.newNonce());
104            } catch (IOException e) {
105              LOG.error("{} tableName: {}, regionNames: {}", ERROR_REOPEN_REIONS_MSG,
106                tableName, regionNames, e);
107            }
108          });
109        }
110      } else {
111        if (LOG.isDebugEnabled()) {
112          LOG.debug("Reopening regions with very high storeFileRefCount is disabled. " +
113            "Provide threshold value > 0 for {} to enable it.",
114            HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
115        }
116      }
117    } catch (Exception e) {
118      LOG.error("Error while reopening regions based on storeRefCount threshold", e);
119    }
120    if (LOG.isTraceEnabled()) {
121      LOG.trace(
122        "Exiting Regions Recovery chore for reopening regions based on storeFileRefCount...");
123    }
124  }
125
126  private Map<TableName, List<byte[]>> getTableToRegionsByRefCount(
127      final Map<ServerName, ServerMetrics> serverMetricsMap) {
128
129    final Map<TableName, List<byte[]>> tableToReopenRegionsMap = new HashMap<>();
130    for (ServerMetrics serverMetrics : serverMetricsMap.values()) {
131      Map<byte[], RegionMetrics> regionMetricsMap = serverMetrics.getRegionMetrics();
132      for (RegionMetrics regionMetrics : regionMetricsMap.values()) {
133        // For each region, each compacted store file can have different ref counts
134        // We need to find maximum of all such ref counts and if that max count of compacted
135        // store files is beyond a threshold value, we should reopen the region.
136        // Here, we take max ref count of all compacted store files and not the cumulative
137        // count of all compacted store files
138        final int maxCompactedStoreFileRefCount = regionMetrics
139          .getMaxCompactedStoreFileRefCount();
140
141        if (maxCompactedStoreFileRefCount > storeFileRefCountThreshold) {
142          final byte[] regionName = regionMetrics.getRegionName();
143          prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
144            maxCompactedStoreFileRefCount);
145        }
146      }
147    }
148    return tableToReopenRegionsMap;
149
150  }
151
152  private void prepareTableToReopenRegionsMap(
153      final Map<TableName, List<byte[]>> tableToReopenRegionsMap,
154      final byte[] regionName, final int regionStoreRefCount) {
155
156    final RegionInfo regionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
157    final TableName tableName = regionInfo.getTable();
158    if (TableName.isMetaTableName(tableName)) {
159      // Do not reopen regions of meta table even if it has
160      // high store file reference count
161      return;
162    }
163    LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
164      regionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
165    tableToReopenRegionsMap.putIfAbsent(tableName, new ArrayList<>());
166    tableToReopenRegionsMap.get(tableName).add(regionName);
167
168  }
169
170  // hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore
171  // is scheduled at a time - RegionsRecoveryConfigManager
172
173  @Override
174  public boolean equals(Object o) {
175    if (this == o) {
176      return true;
177    }
178    return o != null && getClass() == o.getClass();
179  }
180
181  @Override
182  public int hashCode() {
183    return 31;
184  }
185
186}