View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver.handler;
21  
22  import java.io.IOException;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.Server;
28  import org.apache.hadoop.hbase.executor.EventHandler;
29  import org.apache.hadoop.hbase.regionserver.HRegion;
30  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
31  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
32  import org.apache.zookeeper.KeeperException;
33  
34  /**
35   * Handles closing of a region on a region server.
36   */
37  public class CloseRegionHandler extends EventHandler {
38    // NOTE on priorities shutting down.  There are none for close. There are some
39    // for open.  I think that is right.  On shutdown, we want the meta to close
40    // before root and both to close after the user regions have closed.  What
41    // about the case where master tells us to shutdown a catalog region and we
42    // have a running queue of user regions to close?
43    private static final Log LOG = LogFactory.getLog(CloseRegionHandler.class);
44  
45    private final int FAILED = -1;
46    int expectedVersion = FAILED;
47  
48    private final RegionServerServices rsServices;
49  
50    private final HRegionInfo regionInfo;
51  
52    // If true, the hosting server is aborting.  Region close process is different
53    // when we are aborting.
54    private final boolean abort;
55  
56    // Update zk on closing transitions. Usually true.  Its false if cluster
57    // is going down.  In this case, its the rs that initiates the region
58    // close -- not the master process so state up in zk will unlikely be
59    // CLOSING.
60    private final boolean zk;
61  
62    // This is executed after receiving an CLOSE RPC from the master.
63    public CloseRegionHandler(final Server server,
64        final RegionServerServices rsServices, HRegionInfo regionInfo) {
65      this(server, rsServices, regionInfo, false, true, -1);
66    }
67  
68    /**
69     * This method used internally by the RegionServer to close out regions.
70     * @param server
71     * @param rsServices
72     * @param regionInfo
73     * @param abort If the regionserver is aborting.
74     * @param zk If the close should be noted out in zookeeper.
75     */
76    public CloseRegionHandler(final Server server,
77        final RegionServerServices rsServices,
78        final HRegionInfo regionInfo, final boolean abort, final boolean zk,
79        final int versionOfClosingNode) {
80      this(server, rsServices,  regionInfo, abort, zk, versionOfClosingNode,
81        EventType.M_RS_CLOSE_REGION);
82    }
83  
84    protected CloseRegionHandler(final Server server,
85        final RegionServerServices rsServices, HRegionInfo regionInfo,
86        boolean abort, final boolean zk, final int versionOfClosingNode,
87        EventType eventType) {
88      super(server, eventType);
89      this.server = server;
90      this.rsServices = rsServices;
91      this.regionInfo = regionInfo;
92      this.abort = abort;
93      this.zk = zk;
94      this.expectedVersion = versionOfClosingNode;
95    }
96  
97    public HRegionInfo getRegionInfo() {
98      return regionInfo;
99    }
100 
101   @Override
102   public void process() {
103     try {
104       String name = regionInfo.getRegionNameAsString();
105       LOG.debug("Processing close of " + name);
106       String encodedRegionName = regionInfo.getEncodedName();
107       // Check that this region is being served here
108       HRegion region = this.rsServices.getFromOnlineRegions(encodedRegionName);
109       if (region == null) {
110         LOG.warn("Received CLOSE for region " + name +
111             " but currently not serving");
112         return;
113       }
114 
115       // Close the region
116       try {
117         // TODO: If we need to keep updating CLOSING stamp to prevent against
118         // a timeout if this is long-running, need to spin up a thread?
119         if (region.close(abort) == null) {
120           // This region got closed.  Most likely due to a split. So instead
121           // of doing the setClosedState() below, let's just ignore cont
122           // The split message will clean up the master state.
123           LOG.warn("Can't close region: was already closed during close(): " +
124             regionInfo.getRegionNameAsString());
125           return;
126         }
127       } catch (Throwable t) {
128         // A throwable here indicates that we couldn't successfully flush the
129         // memstore before closing. So, we need to abort the server and allow
130         // the master to split our logs in order to recover the data.
131         server.abort("Unrecoverable exception while closing region " +
132           regionInfo.getRegionNameAsString() + ", still finishing close", t);
133         throw new RuntimeException(t);
134       }
135 
136       this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName());
137 
138       if (this.zk) {
139         if (setClosedState(this.expectedVersion, region)) {
140           LOG.debug("set region closed state in zk successfully for region " +
141             name + " sn name: " + this.server.getServerName());
142         } else {
143           LOG.debug("set region closed state in zk unsuccessfully for region " +
144             name + " sn name: " + this.server.getServerName());
145         }
146       }
147 
148       // Done!  Region is closed on this RS
149       LOG.debug("Closed region " + region.getRegionNameAsString());
150     } finally {
151       this.rsServices.removeFromRegionsInTransition(this.regionInfo);
152     }
153   }
154 
155   /**
156    * Transition ZK node to CLOSED
157    * @param expectedVersion
158    * @return If the state is set successfully
159    */
160   private boolean setClosedState(final int expectedVersion, final HRegion region) {
161     try {
162       if (ZKAssign.transitionNodeClosed(server.getZooKeeper(), regionInfo,
163           server.getServerName(), expectedVersion) == FAILED) {
164         LOG.warn("Completed the CLOSE of a region but when transitioning from " +
165             " CLOSING to CLOSED got a version mismatch, someone else clashed " +
166             "so now unassigning");
167         region.close();
168         return false;
169       }
170     } catch (NullPointerException e) {
171       // I've seen NPE when table was deleted while close was running in unit tests.
172       LOG.warn("NPE during close -- catching and continuing...", e);
173       return false;
174     } catch (KeeperException e) {
175       LOG.error("Failed transitioning node from CLOSING to CLOSED", e);
176       return false;
177     } catch (IOException e) {
178       LOG.error("Failed to close region after failing to transition", e);
179       return false;
180     }
181     return true;
182   }
183 
184 }