001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.handler;
019
020import edu.umd.cs.findbugs.annotations.Nullable;
021import java.io.IOException;
022import java.util.concurrent.TimeUnit;
023import org.apache.hadoop.hbase.HConstants;
024import org.apache.hadoop.hbase.ServerName;
025import org.apache.hadoop.hbase.executor.EventHandler;
026import org.apache.hadoop.hbase.executor.EventType;
027import org.apache.hadoop.hbase.regionserver.HRegion;
028import org.apache.hadoop.hbase.regionserver.HRegionServer;
029import org.apache.hadoop.hbase.regionserver.Region;
030import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
031import org.apache.hadoop.hbase.util.Bytes;
032import org.apache.hadoop.hbase.util.RetryCounter;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
037
038/**
039 * Handles closing of a region on a region server.
040 * <p/>
041 * Just done the same thing with the old {@link CloseRegionHandler}, with some modifications on
042 * fencing and retrying. But we need to keep the {@link CloseRegionHandler} as is to keep compatible
043 * with the zk less assignment for 1.x, otherwise it is not possible to do rolling upgrade.
044 */
045@InterfaceAudience.Private
046public class UnassignRegionHandler extends EventHandler {
047
048  private static final Logger LOG = LoggerFactory.getLogger(UnassignRegionHandler.class);
049
050  private final String encodedName;
051
052  private final long closeProcId;
053  // If true, the hosting server is aborting. Region close process is different
054  // when we are aborting.
055  // TODO: not used yet, we still use the old CloseRegionHandler when aborting
056  private final boolean abort;
057
058  private final ServerName destination;
059
060  private final RetryCounter retryCounter;
061
062  public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId,
063      boolean abort, @Nullable ServerName destination, EventType eventType) {
064    super(server, eventType);
065    this.encodedName = encodedName;
066    this.closeProcId = closeProcId;
067    this.abort = abort;
068    this.destination = destination;
069    this.retryCounter = HandlerUtil.getRetryCounter();
070  }
071
072  private HRegionServer getServer() {
073    return (HRegionServer) server;
074  }
075
076  @Override
077  public void process() throws IOException {
078    HRegionServer rs = getServer();
079    byte[] encodedNameBytes = Bytes.toBytes(encodedName);
080    Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.FALSE);
081    if (previous != null) {
082      if (previous) {
083        // This could happen as we will update the region state to OPEN when calling
084        // reportRegionStateTransition, so the HMaster will think the region is online, before we
085        // actually open the region, as reportRegionStateTransition is part of the opening process.
086        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
087        LOG.warn("Received CLOSE for the region: {}, which we are already " +
088          "trying to OPEN. try again after {}ms", encodedName, backoff);
089        rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS);
090      } else {
091        LOG.info("Received CLOSE for the region: {}, which we are already trying to CLOSE," +
092          " but not completed yet", encodedName);
093      }
094      return;
095    }
096    HRegion region = rs.getRegion(encodedName);
097    if (region == null) {
098      LOG.debug(
099        "Received CLOSE for a region {} which is not online, and we're not opening/closing.",
100        encodedName);
101      rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE);
102      return;
103    }
104    String regionName = region.getRegionInfo().getEncodedName();
105    LOG.info("Close {}", regionName);
106    if (region.getCoprocessorHost() != null) {
107      // XXX: The behavior is a bit broken. At master side there is no FAILED_CLOSE state, so if
108      // there are exception thrown from the CP, we can not report the error to master, and if
109      // here we just return without calling reportRegionStateTransition, the TRSP at master side
110      // will hang there for ever. So here if the CP throws an exception out, the only way is to
111      // abort the RS...
112      region.getCoprocessorHost().preClose(abort);
113    }
114    if (region.close(abort) == null) {
115      // XXX: Is this still possible? The old comment says about split, but now split is done at
116      // master side, so...
117      LOG.warn("Can't close region {}, was already closed during close()", regionName);
118      rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE);
119      return;
120    }
121    rs.removeRegion(region, destination);
122    if (!rs.reportRegionStateTransition(
123      new RegionStateTransitionContext(TransitionCode.CLOSED, HConstants.NO_SEQNUM, closeProcId,
124        -1, region.getRegionInfo()))) {
125      throw new IOException("Failed to report close to master: " + regionName);
126    }
127    // Cache the close region procedure id after report region transition succeed.
128    rs.finishRegionProcedure(closeProcId);
129    rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE);
130    LOG.info("Closed {}", regionName);
131  }
132
133  @Override
134  protected void handleException(Throwable t) {
135    LOG.warn("Fatal error occurred while closing region {}, aborting...", encodedName, t);
136    // Clear any reference in getServer().getRegionsInTransitionInRS() otherwise can hold up
137    // regionserver abort on cluster shutdown. HBASE-23984.
138    getServer().getRegionsInTransitionInRS().remove(Bytes.toBytes(this.encodedName));
139    getServer().abort("Failed to close region " + encodedName + " and can not recover", t);
140  }
141
142  public static UnassignRegionHandler create(HRegionServer server, String encodedName,
143      long closeProcId, boolean abort, @Nullable ServerName destination) {
144    // Just try our best to determine whether it is for closing meta. It is not the end of the world
145    // if we put the handler into a wrong executor.
146    Region region = server.getRegion(encodedName);
147    EventType eventType =
148      region != null && region.getRegionInfo().isMetaRegion() ? EventType.M_RS_CLOSE_META
149        : EventType.M_RS_CLOSE_REGION;
150    return new UnassignRegionHandler(server, encodedName, closeProcId, abort, destination,
151      eventType);
152  }
153}