001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.regionserver.handler; 020 021import java.io.IOException; 022import java.util.concurrent.atomic.AtomicBoolean; 023 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.Server; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.client.TableDescriptor; 028import org.apache.hadoop.hbase.executor.EventHandler; 029import org.apache.hadoop.hbase.executor.EventType; 030import org.apache.hadoop.hbase.regionserver.HRegion; 031import org.apache.hadoop.hbase.regionserver.RegionServerServices; 032import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext; 033import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 034import org.apache.hadoop.hbase.util.CancelableProgressable; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 039/** 040 * Handles opening of a region on a region server. 041 * <p> 042 * This is executed after receiving an OPEN RPC from the master or client. 043 */ 044@InterfaceAudience.Private 045public class OpenRegionHandler extends EventHandler { 046 private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class); 047 048 protected final RegionServerServices rsServices; 049 050 private final RegionInfo regionInfo; 051 private final TableDescriptor htd; 052 private final long masterSystemTime; 053 054 public OpenRegionHandler(final Server server, 055 final RegionServerServices rsServices, RegionInfo regionInfo, 056 TableDescriptor htd, long masterSystemTime) { 057 this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION); 058 } 059 060 protected OpenRegionHandler(final Server server, 061 final RegionServerServices rsServices, final RegionInfo regionInfo, 062 final TableDescriptor htd, long masterSystemTime, EventType eventType) { 063 super(server, eventType); 064 this.rsServices = rsServices; 065 this.regionInfo = regionInfo; 066 this.htd = htd; 067 this.masterSystemTime = masterSystemTime; 068 } 069 070 public RegionInfo getRegionInfo() { 071 return regionInfo; 072 } 073 074 @Override 075 public void process() throws IOException { 076 boolean openSuccessful = false; 077 final String regionName = regionInfo.getRegionNameAsString(); 078 HRegion region = null; 079 080 try { 081 if (this.server.isStopped() || this.rsServices.isStopping()) { 082 return; 083 } 084 final String encodedName = regionInfo.getEncodedName(); 085 086 // 2 different difficult situations can occur 087 // 1) The opening was cancelled. This is an expected situation 088 // 2) The region is now marked as online while we're suppose to open. This would be a bug. 089 090 // Check that this region is not already online 091 if (this.rsServices.getRegion(encodedName) != null) { 092 LOG.error("Region " + encodedName + 093 " was already online when we started processing the opening. " + 094 "Marking this new attempt as failed"); 095 return; 096 } 097 098 // Check that we're still supposed to open the region. 099 // If fails, just return. Someone stole the region from under us. 100 if (!isRegionStillOpening()){ 101 LOG.error("Region " + encodedName + " opening cancelled"); 102 return; 103 } 104 105 // Open region. After a successful open, failures in subsequent 106 // processing needs to do a close as part of cleanup. 107 region = openRegion(); 108 if (region == null) { 109 return; 110 } 111 112 if (!updateMeta(region, masterSystemTime) || this.server.isStopped() || 113 this.rsServices.isStopping()) { 114 return; 115 } 116 117 if (!isRegionStillOpening()) { 118 return; 119 } 120 121 // Successful region open, and add it to MutableOnlineRegions 122 this.rsServices.addRegion(region); 123 openSuccessful = true; 124 125 // Done! Successful region open 126 LOG.debug("Opened " + regionName + " on " + this.server.getServerName()); 127 } finally { 128 // Do all clean up here 129 if (!openSuccessful) { 130 doCleanUpOnFailedOpen(region); 131 } 132 final Boolean current = this.rsServices.getRegionsInTransitionInRS(). 133 remove(this.regionInfo.getEncodedNameAsBytes()); 134 135 // Let's check if we have met a race condition on open cancellation.... 136 // A better solution would be to not have any race condition. 137 // this.rsServices.getRegionsInTransitionInRS().remove( 138 // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE); 139 // would help. 140 if (openSuccessful) { 141 if (current == null) { // Should NEVER happen, but let's be paranoid. 142 LOG.error("Bad state: we've just opened a region that was NOT in transition. Region=" 143 + regionName); 144 } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're 145 // really unlucky. 146 LOG.error("Race condition: we've finished to open a region, while a close was requested " 147 + " on region=" + regionName + ". It can be a critical error, as a region that" 148 + " should be closed is now opened. Closing it now"); 149 cleanupFailedOpen(region); 150 } 151 } 152 } 153 } 154 155 private void doCleanUpOnFailedOpen(HRegion region) 156 throws IOException { 157 try { 158 if (region != null) { 159 cleanupFailedOpen(region); 160 } 161 } finally { 162 rsServices.reportRegionStateTransition(new RegionStateTransitionContext( 163 TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, -1, regionInfo)); 164 } 165 } 166 167 /** 168 * Update ZK or META. This can take a while if for example the 169 * hbase:meta is not available -- if server hosting hbase:meta crashed and we are 170 * waiting on it to come back -- so run in a thread and keep updating znode 171 * state meantime so master doesn't timeout our region-in-transition. 172 * Caller must cleanup region if this fails. 173 */ 174 boolean updateMeta(final HRegion r, long masterSystemTime) { 175 if (this.server.isStopped() || this.rsServices.isStopping()) { 176 return false; 177 } 178 // Object we do wait/notify on. Make it boolean. If set, we're done. 179 // Else, wait. 180 final AtomicBoolean signaller = new AtomicBoolean(false); 181 PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r, 182 this.server, this.rsServices, signaller, masterSystemTime); 183 t.start(); 184 // Post open deploy task: 185 // meta => update meta location in ZK 186 // other region => update meta 187 while (!signaller.get() && t.isAlive() && !this.server.isStopped() && 188 !this.rsServices.isStopping() && isRegionStillOpening()) { 189 synchronized (signaller) { 190 try { 191 // Wait for 10 seconds, so that server shutdown 192 // won't take too long if this thread happens to run. 193 if (!signaller.get()) signaller.wait(10000); 194 } catch (InterruptedException e) { 195 // Go to the loop check. 196 } 197 } 198 } 199 // Is thread still alive? We may have left above loop because server is 200 // stopping or we timed out the edit. Is so, interrupt it. 201 if (t.isAlive()) { 202 if (!signaller.get()) { 203 // Thread still running; interrupt 204 LOG.debug("Interrupting thread " + t); 205 t.interrupt(); 206 } 207 try { 208 t.join(); 209 } catch (InterruptedException ie) { 210 LOG.warn("Interrupted joining " + 211 r.getRegionInfo().getRegionNameAsString(), ie); 212 Thread.currentThread().interrupt(); 213 } 214 } 215 216 // Was there an exception opening the region? This should trigger on 217 // InterruptedException too. If so, we failed. 218 return (!Thread.interrupted() && t.getException() == null); 219 } 220 221 /** 222 * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes 223 * to check for exceptions running 224 * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)} 225 */ 226 static class PostOpenDeployTasksThread extends Thread { 227 private Throwable exception = null; 228 private final Server server; 229 private final RegionServerServices services; 230 private final HRegion region; 231 private final AtomicBoolean signaller; 232 private final long masterSystemTime; 233 234 PostOpenDeployTasksThread(final HRegion region, final Server server, 235 final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) { 236 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName()); 237 this.setDaemon(true); 238 this.server = server; 239 this.services = services; 240 this.region = region; 241 this.signaller = signaller; 242 this.masterSystemTime = masterSystemTime; 243 } 244 245 @Override 246 public void run() { 247 try { 248 this.services.postOpenDeployTasks(new PostOpenDeployContext(region, masterSystemTime)); 249 } catch (Throwable e) { 250 String msg = "Exception running postOpenDeployTasks; region=" + 251 this.region.getRegionInfo().getEncodedName(); 252 this.exception = e; 253 if (e instanceof IOException 254 && isRegionStillOpening(region.getRegionInfo(), services)) { 255 server.abort(msg, e); 256 } else { 257 LOG.warn(msg, e); 258 } 259 } 260 // We're done. Set flag then wake up anyone waiting on thread to complete. 261 this.signaller.set(true); 262 synchronized (this.signaller) { 263 this.signaller.notify(); 264 } 265 } 266 267 /** 268 * @return Null or the run exception; call this method after thread is done. 269 */ 270 Throwable getException() { 271 return this.exception; 272 } 273 } 274 275 /** 276 * @return Instance of HRegion if successful open else null. 277 */ 278 HRegion openRegion() { 279 HRegion region = null; 280 try { 281 // Instantiate the region. This also periodically tickles OPENING 282 // state so master doesn't timeout this region in transition. 283 region = HRegion.openHRegion(this.regionInfo, this.htd, 284 this.rsServices.getWAL(this.regionInfo), 285 this.server.getConfiguration(), 286 this.rsServices, 287 new CancelableProgressable() { 288 @Override 289 public boolean progress() { 290 if (!isRegionStillOpening()) { 291 LOG.warn("Open region aborted since it isn't opening any more"); 292 return false; 293 } 294 return true; 295 } 296 }); 297 } catch (Throwable t) { 298 // We failed open. Our caller will see the 'null' return value 299 // and transition the node back to FAILED_OPEN. If that fails, 300 // we rely on the Timeout Monitor in the master to reassign. 301 LOG.error( 302 "Failed open of region=" + this.regionInfo.getRegionNameAsString(), t); 303 } 304 return region; 305 } 306 307 void cleanupFailedOpen(final HRegion region) throws IOException { 308 if (region != null) { 309 this.rsServices.removeRegion(region, null); 310 region.close(); 311 } 312 } 313 314 private static boolean isRegionStillOpening( 315 RegionInfo regionInfo, RegionServerServices rsServices) { 316 byte[] encodedName = regionInfo.getEncodedNameAsBytes(); 317 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName); 318 return Boolean.TRUE.equals(action); // true means opening for RIT 319 } 320 321 private boolean isRegionStillOpening() { 322 return isRegionStillOpening(regionInfo, rsServices); 323 } 324}