001/**
002 * Copyright 2010 The Apache Software Foundation
003 *
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020package org.apache.hadoop.hbase.regionserver.handler;
021
022import java.io.IOException;
023
024import org.apache.commons.logging.Log;
025import org.apache.commons.logging.LogFactory;
026import org.apache.hadoop.hbase.HRegionInfo;
027import org.apache.hadoop.hbase.Server;
028import org.apache.hadoop.hbase.executor.EventHandler;
029import org.apache.hadoop.hbase.regionserver.HRegion;
030import org.apache.hadoop.hbase.regionserver.RegionServerServices;
031import org.apache.hadoop.hbase.zookeeper.ZKAssign;
032import org.apache.zookeeper.KeeperException;
033
034/**
035 * Handles closing of a region on a region server.
036 */
037public class CloseRegionHandler extends EventHandler {
038  // NOTE on priorities shutting down.  There are none for close. There are some
039  // for open.  I think that is right.  On shutdown, we want the meta to close
040  // before root and both to close after the user regions have closed.  What
041  // about the case where master tells us to shutdown a catalog region and we
042  // have a running queue of user regions to close?
043  private static final Log LOG = LogFactory.getLog(CloseRegionHandler.class);
044
045  private final int FAILED = -1;
046  int expectedVersion = FAILED;
047
048  private final RegionServerServices rsServices;
049
050  private final HRegionInfo regionInfo;
051
052  // If true, the hosting server is aborting.  Region close process is different
053  // when we are aborting.
054  private final boolean abort;
055
056  // Update zk on closing transitions. Usually true.  Its false if cluster
057  // is going down.  In this case, its the rs that initiates the region
058  // close -- not the master process so state up in zk will unlikely be
059  // CLOSING.
060  private final boolean zk;
061
062  // This is executed after receiving an CLOSE RPC from the master.
063  public CloseRegionHandler(final Server server,
064      final RegionServerServices rsServices, HRegionInfo regionInfo) {
065    this(server, rsServices, regionInfo, false, true, -1);
066  }
067
068  /**
069   * This method used internally by the RegionServer to close out regions.
070   * @param server
071   * @param rsServices
072   * @param regionInfo
073   * @param abort If the regionserver is aborting.
074   * @param zk If the close should be noted out in zookeeper.
075   */
076  public CloseRegionHandler(final Server server,
077      final RegionServerServices rsServices,
078      final HRegionInfo regionInfo, final boolean abort, final boolean zk,
079      final int versionOfClosingNode) {
080    this(server, rsServices,  regionInfo, abort, zk, versionOfClosingNode,
081      EventType.M_RS_CLOSE_REGION);
082  }
083
084  protected CloseRegionHandler(final Server server,
085      final RegionServerServices rsServices, HRegionInfo regionInfo,
086      boolean abort, final boolean zk, final int versionOfClosingNode,
087      EventType eventType) {
088    super(server, eventType);
089    this.server = server;
090    this.rsServices = rsServices;
091    this.regionInfo = regionInfo;
092    this.abort = abort;
093    this.zk = zk;
094    this.expectedVersion = versionOfClosingNode;
095  }
096
097  public HRegionInfo getRegionInfo() {
098    return regionInfo;
099  }
100
101  @Override
102  public void process() {
103    try {
104      String name = regionInfo.getRegionNameAsString();
105      LOG.debug("Processing close of " + name);
106      String encodedRegionName = regionInfo.getEncodedName();
107      // Check that this region is being served here
108      HRegion region = this.rsServices.getFromOnlineRegions(encodedRegionName);
109      if (region == null) {
110        LOG.warn("Received CLOSE for region " + name +
111            " but currently not serving");
112        return;
113      }
114
115      // Close the region
116      try {
117        // TODO: If we need to keep updating CLOSING stamp to prevent against
118        // a timeout if this is long-running, need to spin up a thread?
119        if (region.close(abort) == null) {
120          // This region got closed.  Most likely due to a split. So instead
121          // of doing the setClosedState() below, let's just ignore cont
122          // The split message will clean up the master state.
123          LOG.warn("Can't close region: was already closed during close(): " +
124            regionInfo.getRegionNameAsString());
125          return;
126        }
127      } catch (Throwable t) {
128        // A throwable here indicates that we couldn't successfully flush the
129        // memstore before closing. So, we need to abort the server and allow
130        // the master to split our logs in order to recover the data.
131        server.abort("Unrecoverable exception while closing region " +
132          regionInfo.getRegionNameAsString() + ", still finishing close", t);
133        throw new RuntimeException(t);
134      }
135
136      this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName());
137
138      if (this.zk) {
139        if (setClosedState(this.expectedVersion, region)) {
140          LOG.debug("set region closed state in zk successfully for region " +
141            name + " sn name: " + this.server.getServerName());
142        } else {
143          LOG.debug("set region closed state in zk unsuccessfully for region " +
144            name + " sn name: " + this.server.getServerName());
145        }
146      }
147
148      // Done!  Region is closed on this RS
149      LOG.debug("Closed region " + region.getRegionNameAsString());
150    } finally {
151      this.rsServices.removeFromRegionsInTransition(this.regionInfo);
152    }
153  }
154
155  /**
156   * Transition ZK node to CLOSED
157   * @param expectedVersion
158   * @return If the state is set successfully
159   */
160  private boolean setClosedState(final int expectedVersion, final HRegion region) {
161    try {
162      if (ZKAssign.transitionNodeClosed(server.getZooKeeper(), regionInfo,
163          server.getServerName(), expectedVersion) == FAILED) {
164        LOG.warn("Completed the CLOSE of a region but when transitioning from " +
165            " CLOSING to CLOSED got a version mismatch, someone else clashed " +
166            "so now unassigning");
167        region.close();
168        return false;
169      }
170    } catch (NullPointerException e) {
171      // I've seen NPE when table was deleted while close was running in unit tests.
172      LOG.warn("NPE during close -- catching and continuing...", e);
173      return false;
174    } catch (KeeperException e) {
175      LOG.error("Failed transitioning node from CLOSING to CLOSED", e);
176      return false;
177    } catch (IOException e) {
178      LOG.error("Failed to close region after failing to transition", e);
179      return false;
180    }
181    return true;
182  }
183
184}