001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.HashMap; 023import java.util.List; 024import java.util.Map; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.hbase.ClusterMetrics; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.RegionMetrics; 029import org.apache.hadoop.hbase.ScheduledChore; 030import org.apache.hadoop.hbase.ServerMetrics; 031import org.apache.hadoop.hbase.ServerName; 032import org.apache.hadoop.hbase.Stoppable; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040import org.apache.hbase.thirdparty.org.apache.commons.collections4.MapUtils; 041 042/** 043 * This chore, every time it runs, will try to recover regions with high store ref count by 044 * reopening them 045 */ 046@InterfaceAudience.Private 047public class RegionsRecoveryChore extends ScheduledChore { 048 049 private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryChore.class); 050 051 private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore"; 052 053 private static final String ERROR_REOPEN_REIONS_MSG = 054 "Error reopening regions with high storeRefCount. "; 055 056 private final HMaster hMaster; 057 private final int storeFileRefCountThreshold; 058 059 private static final PerClientRandomNonceGenerator NONCE_GENERATOR = 060 PerClientRandomNonceGenerator.get(); 061 062 /** 063 * Construct RegionsRecoveryChore with provided params 064 * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and 065 * cleanup 066 * @param configuration The configuration params to be used 067 * @param hMaster HMaster instance to initiate RegionTableRegions 068 */ 069 RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration, 070 final HMaster hMaster) { 071 super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration 072 .getInt(HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL)); 073 this.hMaster = hMaster; 074 this.storeFileRefCountThreshold = configuration.getInt( 075 HConstants.STORE_FILE_REF_COUNT_THRESHOLD, HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD); 076 077 } 078 079 @Override 080 protected void chore() { 081 if (LOG.isTraceEnabled()) { 082 LOG.trace( 083 "Starting up Regions Recovery chore for reopening regions based on storeFileRefCount..."); 084 } 085 try { 086 // only if storeFileRefCountThreshold > 0, consider the feature turned on 087 if (storeFileRefCountThreshold > 0) { 088 final ClusterMetrics clusterMetrics = hMaster.getClusterMetrics(); 089 final Map<ServerName, ServerMetrics> serverMetricsMap = 090 clusterMetrics.getLiveServerMetrics(); 091 final Map<TableName, List<byte[]>> tableToReopenRegionsMap = 092 getTableToRegionsByRefCount(serverMetricsMap); 093 if (MapUtils.isNotEmpty(tableToReopenRegionsMap)) { 094 tableToReopenRegionsMap.forEach((tableName, regionNames) -> { 095 try { 096 LOG.warn("Reopening regions due to high storeFileRefCount. " 097 + "TableName: {} , noOfRegions: {}", tableName, regionNames.size()); 098 hMaster.reopenRegions(tableName, regionNames, NONCE_GENERATOR.getNonceGroup(), 099 NONCE_GENERATOR.newNonce()); 100 } catch (IOException e) { 101 LOG.error("{} tableName: {}, regionNames: {}", ERROR_REOPEN_REIONS_MSG, tableName, 102 regionNames, e); 103 } 104 }); 105 } 106 } else { 107 if (LOG.isDebugEnabled()) { 108 LOG.debug( 109 "Reopening regions with very high storeFileRefCount is disabled. " 110 + "Provide threshold value > 0 for {} to enable it.", 111 HConstants.STORE_FILE_REF_COUNT_THRESHOLD); 112 } 113 } 114 } catch (Exception e) { 115 LOG.error("Error while reopening regions based on storeRefCount threshold", e); 116 } 117 if (LOG.isTraceEnabled()) { 118 LOG.trace( 119 "Exiting Regions Recovery chore for reopening regions based on storeFileRefCount..."); 120 } 121 } 122 123 private Map<TableName, List<byte[]>> 124 getTableToRegionsByRefCount(final Map<ServerName, ServerMetrics> serverMetricsMap) { 125 final Map<TableName, List<byte[]>> tableToReopenRegionsMap = new HashMap<>(); 126 for (ServerMetrics serverMetrics : serverMetricsMap.values()) { 127 Map<byte[], RegionMetrics> regionMetricsMap = serverMetrics.getRegionMetrics(); 128 for (RegionMetrics regionMetrics : regionMetricsMap.values()) { 129 // For each region, each compacted store file can have different ref counts 130 // We need to find maximum of all such ref counts and if that max count of compacted 131 // store files is beyond a threshold value, we should reopen the region. 132 // Here, we take max ref count of all compacted store files and not the cumulative 133 // count of all compacted store files 134 final int maxCompactedStoreFileRefCount = regionMetrics.getMaxCompactedStoreFileRefCount(); 135 136 if (maxCompactedStoreFileRefCount > storeFileRefCountThreshold) { 137 final byte[] regionName = regionMetrics.getRegionName(); 138 prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName, 139 maxCompactedStoreFileRefCount); 140 } 141 } 142 } 143 return tableToReopenRegionsMap; 144 } 145 146 private void prepareTableToReopenRegionsMap( 147 final Map<TableName, List<byte[]>> tableToReopenRegionsMap, final byte[] regionName, 148 final int regionStoreRefCount) { 149 final RegionInfo regionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName); 150 final TableName tableName = regionInfo.getTable(); 151 if (TableName.isMetaTableName(tableName)) { 152 // Do not reopen regions of meta table even if it has 153 // high store file reference count 154 return; 155 } 156 LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..", 157 regionInfo.getRegionNameAsString(), tableName, regionStoreRefCount); 158 tableToReopenRegionsMap.putIfAbsent(tableName, new ArrayList<>()); 159 tableToReopenRegionsMap.get(tableName).add(regionName); 160 161 } 162}