001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.handler; 019 020import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_CLOSE; 021import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_SPLIT; 022import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY; 023import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_SPLIT_KEY; 024 025import edu.umd.cs.findbugs.annotations.Nullable; 026import java.io.IOException; 027import java.util.concurrent.TimeUnit; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.ServerName; 030import org.apache.hadoop.hbase.executor.EventHandler; 031import org.apache.hadoop.hbase.executor.EventType; 032import org.apache.hadoop.hbase.regionserver.HRegion; 033import org.apache.hadoop.hbase.regionserver.HRegionServer; 034import org.apache.hadoop.hbase.regionserver.Region; 035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.util.RetryCounter; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041import org.slf4j.MDC; 042 043import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 044 045/** 046 * Handles closing of a region on a region server. 047 * <p/> 048 * Just done the same thing with the old {@link CloseRegionHandler}, with some modifications on 049 * fencing and retrying. But we need to keep the {@link CloseRegionHandler} as is to keep compatible 050 * with the zk less assignment for 1.x, otherwise it is not possible to do rolling upgrade. 051 */ 052@InterfaceAudience.Private 053public class UnassignRegionHandler extends EventHandler { 054 055 private static final Logger LOG = LoggerFactory.getLogger(UnassignRegionHandler.class); 056 057 private final String encodedName; 058 059 private final long closeProcId; 060 // If true, the hosting server is aborting. Region close process is different 061 // when we are aborting. 062 // TODO: not used yet, we still use the old CloseRegionHandler when aborting 063 private final boolean abort; 064 065 private final ServerName destination; 066 067 private final RetryCounter retryCounter; 068 069 private boolean isSplit; 070 071 // active time of the master that sent this unassign request, used for fencing 072 private final long initiatingMasterActiveTime; 073 074 public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId, 075 boolean abort, @Nullable ServerName destination, EventType eventType, 076 long initiatingMasterActiveTime, boolean isSplit) { 077 super(server, eventType); 078 this.encodedName = encodedName; 079 this.closeProcId = closeProcId; 080 this.abort = abort; 081 this.destination = destination; 082 this.retryCounter = HandlerUtil.getRetryCounter(); 083 this.isSplit = isSplit; 084 this.initiatingMasterActiveTime = initiatingMasterActiveTime; 085 } 086 087 private HRegionServer getServer() { 088 return (HRegionServer) server; 089 } 090 091 @Override 092 public void process() throws IOException { 093 MDC.put("pid", Long.toString(closeProcId)); 094 HRegionServer rs = getServer(); 095 byte[] encodedNameBytes = Bytes.toBytes(encodedName); 096 Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.FALSE); 097 if (previous != null) { 098 if (previous) { 099 // This could happen as we will update the region state to OPEN when calling 100 // reportRegionStateTransition, so the HMaster will think the region is online, before we 101 // actually open the region, as reportRegionStateTransition is part of the opening process. 102 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 103 LOG.warn( 104 "Received CLOSE for {} which we are already " + "trying to OPEN; try again after {}ms", 105 encodedName, backoff); 106 rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS); 107 } else { 108 LOG.info( 109 "Received CLOSE for {} which we are already trying to CLOSE," + " but not completed yet", 110 encodedName); 111 } 112 return; 113 } 114 HRegion region = rs.getRegion(encodedName); 115 if (region == null) { 116 LOG.debug("Received CLOSE for {} which is not ONLINE and we're not opening/closing.", 117 encodedName); 118 rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE); 119 return; 120 } 121 String regionName = region.getRegionInfo().getEncodedName(); 122 LOG.info("Close {}", regionName); 123 if (region.getCoprocessorHost() != null) { 124 // XXX: The behavior is a bit broken. At master side there is no FAILED_CLOSE state, so if 125 // there are exception thrown from the CP, we can not report the error to master, and if 126 // here we just return without calling reportRegionStateTransition, the TRSP at master side 127 // will hang there for ever. So here if the CP throws an exception out, the only way is to 128 // abort the RS... 129 region.getCoprocessorHost().preClose(abort); 130 } 131 // This should be true only in the case of splits/merges closing the parent regions, as 132 // there's no point on keep blocks for those region files. 133 final boolean evictCacheOnClose = isSplit 134 ? server.getConfiguration().getBoolean(EVICT_BLOCKS_ON_SPLIT_KEY, DEFAULT_EVICT_ON_SPLIT) 135 : server.getConfiguration().getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, DEFAULT_EVICT_ON_CLOSE); 136 LOG.debug("Unassign region: split region: {}: evictCache: {}", isSplit, evictCacheOnClose); 137 region.getStores().forEach(s -> s.getCacheConfig().setEvictOnClose(evictCacheOnClose)); 138 139 if (region.close(abort) == null) { 140 // XXX: Is this still possible? The old comment says about split, but now split is done at 141 // master side, so... 142 LOG.warn("Can't close {}, already closed during close()", regionName); 143 rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE); 144 return; 145 } 146 147 rs.removeRegion(region, destination); 148 if ( 149 !rs.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.CLOSED, 150 HConstants.NO_SEQNUM, closeProcId, -1, region.getRegionInfo(), initiatingMasterActiveTime)) 151 ) { 152 throw new IOException("Failed to report close to master: " + regionName); 153 } 154 // Cache the close region procedure id after report region transition succeed. 155 rs.finishRegionProcedure(closeProcId); 156 rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE); 157 LOG.info("Closed {}", regionName); 158 } 159 160 @Override 161 protected void handleException(Throwable t) { 162 LOG.warn("Fatal error occurred while closing region {}, aborting...", encodedName, t); 163 // Clear any reference in getServer().getRegionsInTransitionInRS() otherwise can hold up 164 // regionserver abort on cluster shutdown. HBASE-23984. 165 getServer().getRegionsInTransitionInRS().remove(Bytes.toBytes(this.encodedName)); 166 getServer().abort("Failed to close region " + encodedName + " and can not recover", t); 167 } 168 169 public static UnassignRegionHandler create(HRegionServer server, String encodedName, 170 long closeProcId, boolean abort, @Nullable ServerName destination, boolean evictCache, 171 long initiatingMasterActiveTime) { 172 // Just try our best to determine whether it is for closing meta. It is not the end of the world 173 // if we put the handler into a wrong executor. 174 Region region = server.getRegion(encodedName); 175 EventType eventType = region != null && region.getRegionInfo().isMetaRegion() 176 ? EventType.M_RS_CLOSE_META 177 : EventType.M_RS_CLOSE_REGION; 178 return new UnassignRegionHandler(server, encodedName, closeProcId, abort, destination, 179 eventType, initiatingMasterActiveTime, evictCache); 180 } 181}