001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.handler;
019
020import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_CLOSE;
021import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_SPLIT;
022import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY;
023import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_SPLIT_KEY;
024
025import edu.umd.cs.findbugs.annotations.Nullable;
026import java.io.IOException;
027import java.util.concurrent.TimeUnit;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.ServerName;
030import org.apache.hadoop.hbase.executor.EventHandler;
031import org.apache.hadoop.hbase.executor.EventType;
032import org.apache.hadoop.hbase.regionserver.HRegion;
033import org.apache.hadoop.hbase.regionserver.HRegionServer;
034import org.apache.hadoop.hbase.regionserver.Region;
035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.RetryCounter;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041import org.slf4j.MDC;
042
043import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
044
045/**
046 * Handles closing of a region on a region server.
047 * <p/>
048 * Just done the same thing with the old {@link CloseRegionHandler}, with some modifications on
049 * fencing and retrying. But we need to keep the {@link CloseRegionHandler} as is to keep compatible
050 * with the zk less assignment for 1.x, otherwise it is not possible to do rolling upgrade.
051 */
052@InterfaceAudience.Private
053public class UnassignRegionHandler extends EventHandler {
054
055  private static final Logger LOG = LoggerFactory.getLogger(UnassignRegionHandler.class);
056
057  private final String encodedName;
058
059  private final long closeProcId;
060  // If true, the hosting server is aborting. Region close process is different
061  // when we are aborting.
062  // TODO: not used yet, we still use the old CloseRegionHandler when aborting
063  private final boolean abort;
064
065  private final ServerName destination;
066
067  private final RetryCounter retryCounter;
068
069  private boolean isSplit;
070
071  // active time of the master that sent this unassign request, used for fencing
072  private final long initiatingMasterActiveTime;
073
074  public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId,
075    boolean abort, @Nullable ServerName destination, EventType eventType,
076    long initiatingMasterActiveTime, boolean isSplit) {
077    super(server, eventType);
078    this.encodedName = encodedName;
079    this.closeProcId = closeProcId;
080    this.abort = abort;
081    this.destination = destination;
082    this.retryCounter = HandlerUtil.getRetryCounter();
083    this.isSplit = isSplit;
084    this.initiatingMasterActiveTime = initiatingMasterActiveTime;
085  }
086
087  private HRegionServer getServer() {
088    return (HRegionServer) server;
089  }
090
091  @Override
092  public void process() throws IOException {
093    MDC.put("pid", Long.toString(closeProcId));
094    HRegionServer rs = getServer();
095    byte[] encodedNameBytes = Bytes.toBytes(encodedName);
096    Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.FALSE);
097    if (previous != null) {
098      if (previous) {
099        // This could happen as we will update the region state to OPEN when calling
100        // reportRegionStateTransition, so the HMaster will think the region is online, before we
101        // actually open the region, as reportRegionStateTransition is part of the opening process.
102        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
103        LOG.warn(
104          "Received CLOSE for {} which we are already " + "trying to OPEN; try again after {}ms",
105          encodedName, backoff);
106        rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS);
107      } else {
108        LOG.info(
109          "Received CLOSE for {} which we are already trying to CLOSE," + " but not completed yet",
110          encodedName);
111      }
112      return;
113    }
114    HRegion region = rs.getRegion(encodedName);
115    if (region == null) {
116      LOG.debug("Received CLOSE for {} which is not ONLINE and we're not opening/closing.",
117        encodedName);
118      rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE);
119      return;
120    }
121    String regionName = region.getRegionInfo().getEncodedName();
122    LOG.info("Close {}", regionName);
123    if (region.getCoprocessorHost() != null) {
124      // XXX: The behavior is a bit broken. At master side there is no FAILED_CLOSE state, so if
125      // there are exception thrown from the CP, we can not report the error to master, and if
126      // here we just return without calling reportRegionStateTransition, the TRSP at master side
127      // will hang there for ever. So here if the CP throws an exception out, the only way is to
128      // abort the RS...
129      region.getCoprocessorHost().preClose(abort);
130    }
131    // This should be true only in the case of splits/merges closing the parent regions, as
132    // there's no point on keep blocks for those region files.
133    final boolean evictCacheOnClose = isSplit
134      ? server.getConfiguration().getBoolean(EVICT_BLOCKS_ON_SPLIT_KEY, DEFAULT_EVICT_ON_SPLIT)
135      : server.getConfiguration().getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, DEFAULT_EVICT_ON_CLOSE);
136    LOG.debug("Unassign region: split region: {}: evictCache: {}", isSplit, evictCacheOnClose);
137    region.getStores().forEach(s -> s.getCacheConfig().setEvictOnClose(evictCacheOnClose));
138
139    if (region.close(abort) == null) {
140      // XXX: Is this still possible? The old comment says about split, but now split is done at
141      // master side, so...
142      LOG.warn("Can't close {}, already closed during close()", regionName);
143      rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE);
144      return;
145    }
146
147    rs.removeRegion(region, destination);
148    if (
149      !rs.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.CLOSED,
150        HConstants.NO_SEQNUM, closeProcId, -1, region.getRegionInfo(), initiatingMasterActiveTime))
151    ) {
152      throw new IOException("Failed to report close to master: " + regionName);
153    }
154    // Cache the close region procedure id after report region transition succeed.
155    rs.finishRegionProcedure(closeProcId);
156    rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE);
157    LOG.info("Closed {}", regionName);
158  }
159
160  @Override
161  protected void handleException(Throwable t) {
162    LOG.warn("Fatal error occurred while closing region {}, aborting...", encodedName, t);
163    // Clear any reference in getServer().getRegionsInTransitionInRS() otherwise can hold up
164    // regionserver abort on cluster shutdown. HBASE-23984.
165    getServer().getRegionsInTransitionInRS().remove(Bytes.toBytes(this.encodedName));
166    getServer().abort("Failed to close region " + encodedName + " and can not recover", t);
167  }
168
169  public static UnassignRegionHandler create(HRegionServer server, String encodedName,
170    long closeProcId, boolean abort, @Nullable ServerName destination, boolean evictCache,
171    long initiatingMasterActiveTime) {
172    // Just try our best to determine whether it is for closing meta. It is not the end of the world
173    // if we put the handler into a wrong executor.
174    Region region = server.getRegion(encodedName);
175    EventType eventType = region != null && region.getRegionInfo().isMetaRegion()
176      ? EventType.M_RS_CLOSE_META
177      : EventType.M_RS_CLOSE_REGION;
178    return new UnassignRegionHandler(server, encodedName, closeProcId, abort, destination,
179      eventType, initiatingMasterActiveTime, evictCache);
180  }
181}