001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.master; 020 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.hbase.ClusterMetrics; 029import org.apache.hadoop.hbase.HConstants; 030import org.apache.hadoop.hbase.RegionMetrics; 031import org.apache.hadoop.hbase.ScheduledChore; 032import org.apache.hadoop.hbase.ServerMetrics; 033import org.apache.hadoop.hbase.ServerName; 034import org.apache.hadoop.hbase.Stoppable; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042import org.apache.hbase.thirdparty.org.apache.commons.collections4.MapUtils; 043 044/** 045 * This chore, every time it runs, will try to recover regions with high store ref count 046 * by reopening them 047 */ 048@InterfaceAudience.Private 049public class RegionsRecoveryChore extends ScheduledChore { 050 051 private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryChore.class); 052 053 private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore"; 054 055 private static final String ERROR_REOPEN_REIONS_MSG = 056 "Error reopening regions with high storeRefCount. "; 057 058 private final HMaster hMaster; 059 private final int storeFileRefCountThreshold; 060 061 private static final PerClientRandomNonceGenerator NONCE_GENERATOR = 062 PerClientRandomNonceGenerator.get(); 063 064 /** 065 * Construct RegionsRecoveryChore with provided params 066 * 067 * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup 068 * @param configuration The configuration params to be used 069 * @param hMaster HMaster instance to initiate RegionTableRegions 070 */ 071 RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration, 072 final HMaster hMaster) { 073 074 super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt( 075 HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL)); 076 this.hMaster = hMaster; 077 this.storeFileRefCountThreshold = configuration.getInt( 078 HConstants.STORE_FILE_REF_COUNT_THRESHOLD, 079 HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD); 080 081 } 082 083 @Override 084 protected void chore() { 085 if (LOG.isTraceEnabled()) { 086 LOG.trace( 087 "Starting up Regions Recovery chore for reopening regions based on storeFileRefCount..."); 088 } 089 try { 090 // only if storeFileRefCountThreshold > 0, consider the feature turned on 091 if (storeFileRefCountThreshold > 0) { 092 final ClusterMetrics clusterMetrics = hMaster.getClusterMetrics(); 093 final Map<ServerName, ServerMetrics> serverMetricsMap = 094 clusterMetrics.getLiveServerMetrics(); 095 final Map<TableName, List<byte[]>> tableToReopenRegionsMap = 096 getTableToRegionsByRefCount(serverMetricsMap); 097 if (MapUtils.isNotEmpty(tableToReopenRegionsMap)) { 098 tableToReopenRegionsMap.forEach((tableName, regionNames) -> { 099 try { 100 LOG.warn("Reopening regions due to high storeFileRefCount. " + 101 "TableName: {} , noOfRegions: {}", tableName, regionNames.size()); 102 hMaster.reopenRegions(tableName, regionNames, NONCE_GENERATOR.getNonceGroup(), 103 NONCE_GENERATOR.newNonce()); 104 } catch (IOException e) { 105 LOG.error("{} tableName: {}, regionNames: {}", ERROR_REOPEN_REIONS_MSG, 106 tableName, regionNames, e); 107 } 108 }); 109 } 110 } else { 111 if (LOG.isDebugEnabled()) { 112 LOG.debug("Reopening regions with very high storeFileRefCount is disabled. " + 113 "Provide threshold value > 0 for {} to enable it.", 114 HConstants.STORE_FILE_REF_COUNT_THRESHOLD); 115 } 116 } 117 } catch (Exception e) { 118 LOG.error("Error while reopening regions based on storeRefCount threshold", e); 119 } 120 if (LOG.isTraceEnabled()) { 121 LOG.trace( 122 "Exiting Regions Recovery chore for reopening regions based on storeFileRefCount..."); 123 } 124 } 125 126 private Map<TableName, List<byte[]>> getTableToRegionsByRefCount( 127 final Map<ServerName, ServerMetrics> serverMetricsMap) { 128 129 final Map<TableName, List<byte[]>> tableToReopenRegionsMap = new HashMap<>(); 130 for (ServerMetrics serverMetrics : serverMetricsMap.values()) { 131 Map<byte[], RegionMetrics> regionMetricsMap = serverMetrics.getRegionMetrics(); 132 for (RegionMetrics regionMetrics : regionMetricsMap.values()) { 133 // For each region, each compacted store file can have different ref counts 134 // We need to find maximum of all such ref counts and if that max count of compacted 135 // store files is beyond a threshold value, we should reopen the region. 136 // Here, we take max ref count of all compacted store files and not the cumulative 137 // count of all compacted store files 138 final int maxCompactedStoreFileRefCount = regionMetrics 139 .getMaxCompactedStoreFileRefCount(); 140 141 if (maxCompactedStoreFileRefCount > storeFileRefCountThreshold) { 142 final byte[] regionName = regionMetrics.getRegionName(); 143 prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName, 144 maxCompactedStoreFileRefCount); 145 } 146 } 147 } 148 return tableToReopenRegionsMap; 149 150 } 151 152 private void prepareTableToReopenRegionsMap( 153 final Map<TableName, List<byte[]>> tableToReopenRegionsMap, 154 final byte[] regionName, final int regionStoreRefCount) { 155 156 final RegionInfo regionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName); 157 final TableName tableName = regionInfo.getTable(); 158 if (TableName.isMetaTableName(tableName)) { 159 // Do not reopen regions of meta table even if it has 160 // high store file reference count 161 return; 162 } 163 LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..", 164 regionInfo.getRegionNameAsString(), tableName, regionStoreRefCount); 165 tableToReopenRegionsMap.putIfAbsent(tableName, new ArrayList<>()); 166 tableToReopenRegionsMap.get(tableName).add(regionName); 167 168 } 169 170 // hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore 171 // is scheduled at a time - RegionsRecoveryConfigManager 172 173 @Override 174 public boolean equals(Object o) { 175 if (this == o) { 176 return true; 177 } 178 return o != null && getClass() == o.getClass(); 179 } 180 181 @Override 182 public int hashCode() { 183 return 31; 184 } 185 186}