001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.handler; 019 020import java.io.IOException; 021import java.util.concurrent.atomic.AtomicBoolean; 022import org.apache.hadoop.hbase.HConstants; 023import org.apache.hadoop.hbase.Server; 024import org.apache.hadoop.hbase.client.RegionInfo; 025import org.apache.hadoop.hbase.client.TableDescriptor; 026import org.apache.hadoop.hbase.executor.EventHandler; 027import org.apache.hadoop.hbase.executor.EventType; 028import org.apache.hadoop.hbase.procedure2.Procedure; 029import org.apache.hadoop.hbase.regionserver.HRegion; 030import org.apache.hadoop.hbase.regionserver.RegionServerServices; 031import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext; 032import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 033import org.apache.hadoop.hbase.util.CancelableProgressable; 034import org.apache.yetus.audience.InterfaceAudience; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 039 040/** 041 * Handles opening of a region on a region server. 042 * <p> 043 * This is executed after receiving an OPEN RPC from the master or client. 044 * @deprecated Keep it here only for compatible 045 * @see AssignRegionHandler 046 */ 047@Deprecated 048@InterfaceAudience.Private 049public class OpenRegionHandler extends EventHandler { 050 private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class); 051 052 protected final RegionServerServices rsServices; 053 054 private final RegionInfo regionInfo; 055 private final TableDescriptor htd; 056 private final long masterSystemTime; 057 058 public OpenRegionHandler(final Server server, final RegionServerServices rsServices, 059 RegionInfo regionInfo, TableDescriptor htd, long masterSystemTime) { 060 this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION); 061 } 062 063 protected OpenRegionHandler(final Server server, final RegionServerServices rsServices, 064 final RegionInfo regionInfo, final TableDescriptor htd, long masterSystemTime, 065 EventType eventType) { 066 super(server, eventType); 067 this.rsServices = rsServices; 068 this.regionInfo = regionInfo; 069 this.htd = htd; 070 this.masterSystemTime = masterSystemTime; 071 } 072 073 public RegionInfo getRegionInfo() { 074 return regionInfo; 075 } 076 077 @Override 078 public void process() throws IOException { 079 boolean openSuccessful = false; 080 final String regionName = regionInfo.getRegionNameAsString(); 081 HRegion region = null; 082 083 try { 084 if (this.server.isStopped() || this.rsServices.isStopping()) { 085 return; 086 } 087 final String encodedName = regionInfo.getEncodedName(); 088 089 // 2 different difficult situations can occur 090 // 1) The opening was cancelled. This is an expected situation 091 // 2) The region is now marked as online while we're suppose to open. This would be a bug. 092 093 // Check that this region is not already online 094 if (this.rsServices.getRegion(encodedName) != null) { 095 LOG.error( 096 "Region " + encodedName + " was already online when we started processing the opening. " 097 + "Marking this new attempt as failed"); 098 return; 099 } 100 101 // Check that we're still supposed to open the region. 102 // If fails, just return. Someone stole the region from under us. 103 if (!isRegionStillOpening()) { 104 LOG.error("Region " + encodedName + " opening cancelled"); 105 return; 106 } 107 108 // Open region. After a successful open, failures in subsequent 109 // processing needs to do a close as part of cleanup. 110 region = openRegion(); 111 if (region == null) { 112 return; 113 } 114 115 if ( 116 !updateMeta(region, masterSystemTime) || this.server.isStopped() 117 || this.rsServices.isStopping() 118 ) { 119 return; 120 } 121 122 if (!isRegionStillOpening()) { 123 return; 124 } 125 126 // Successful region open, and add it to MutableOnlineRegions 127 this.rsServices.addRegion(region); 128 openSuccessful = true; 129 130 // Done! Successful region open 131 LOG.debug("Opened " + regionName + " on " + this.server.getServerName()); 132 } finally { 133 // Do all clean up here 134 if (!openSuccessful) { 135 doCleanUpOnFailedOpen(region); 136 } 137 final Boolean current = this.rsServices.getRegionsInTransitionInRS() 138 .remove(this.regionInfo.getEncodedNameAsBytes()); 139 140 // Let's check if we have met a race condition on open cancellation.... 141 // A better solution would be to not have any race condition. 142 // this.rsServices.getRegionsInTransitionInRS().remove( 143 // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE); 144 // would help. 145 if (openSuccessful) { 146 if (current == null) { // Should NEVER happen, but let's be paranoid. 147 LOG.error("Bad state: we've just opened a region that was NOT in transition. Region=" 148 + regionName); 149 } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're 150 // really unlucky. 151 LOG.error("Race condition: we've finished to open a region, while a close was requested " 152 + " on region=" + regionName + ". It can be a critical error, as a region that" 153 + " should be closed is now opened. Closing it now"); 154 cleanupFailedOpen(region); 155 } 156 } 157 } 158 } 159 160 private void doCleanUpOnFailedOpen(HRegion region) throws IOException { 161 try { 162 if (region != null) { 163 cleanupFailedOpen(region); 164 } 165 } finally { 166 rsServices.reportRegionStateTransition(new RegionStateTransitionContext( 167 TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo)); 168 } 169 } 170 171 /** 172 * Update ZK or META. This can take a while if for example the hbase:meta is not available -- if 173 * server hosting hbase:meta crashed and we are waiting on it to come back -- so run in a thread 174 * and keep updating znode state meantime so master doesn't timeout our region-in-transition. 175 * Caller must cleanup region if this fails. 176 */ 177 private boolean updateMeta(final HRegion r, long masterSystemTime) { 178 if (this.server.isStopped() || this.rsServices.isStopping()) { 179 return false; 180 } 181 // Object we do wait/notify on. Make it boolean. If set, we're done. 182 // Else, wait. 183 final AtomicBoolean signaller = new AtomicBoolean(false); 184 PostOpenDeployTasksThread t = 185 new PostOpenDeployTasksThread(r, this.server, this.rsServices, signaller, masterSystemTime); 186 t.start(); 187 // Post open deploy task: 188 // meta => update meta location in ZK 189 // other region => update meta 190 while ( 191 !signaller.get() && t.isAlive() && !this.server.isStopped() && !this.rsServices.isStopping() 192 && isRegionStillOpening() 193 ) { 194 synchronized (signaller) { 195 try { 196 // Wait for 10 seconds, so that server shutdown 197 // won't take too long if this thread happens to run. 198 if (!signaller.get()) signaller.wait(10000); 199 } catch (InterruptedException e) { 200 // Go to the loop check. 201 } 202 } 203 } 204 // Is thread still alive? We may have left above loop because server is 205 // stopping or we timed out the edit. Is so, interrupt it. 206 if (t.isAlive()) { 207 if (!signaller.get()) { 208 // Thread still running; interrupt 209 LOG.debug("Interrupting thread " + t); 210 t.interrupt(); 211 } 212 try { 213 t.join(); 214 } catch (InterruptedException ie) { 215 LOG.warn("Interrupted joining " + r.getRegionInfo().getRegionNameAsString(), ie); 216 Thread.currentThread().interrupt(); 217 } 218 } 219 220 // Was there an exception opening the region? This should trigger on 221 // InterruptedException too. If so, we failed. 222 return (!Thread.interrupted() && t.getException() == null); 223 } 224 225 /** 226 * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes to 227 * check for exceptions running 228 * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)} 229 */ 230 static class PostOpenDeployTasksThread extends Thread { 231 private Throwable exception = null; 232 private final Server server; 233 private final RegionServerServices services; 234 private final HRegion region; 235 private final AtomicBoolean signaller; 236 private final long masterSystemTime; 237 238 PostOpenDeployTasksThread(final HRegion region, final Server server, 239 final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) { 240 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName()); 241 this.setDaemon(true); 242 this.server = server; 243 this.services = services; 244 this.region = region; 245 this.signaller = signaller; 246 this.masterSystemTime = masterSystemTime; 247 } 248 249 @Override 250 public void run() { 251 try { 252 this.services.postOpenDeployTasks( 253 new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime)); 254 } catch (Throwable e) { 255 String msg = "Exception running postOpenDeployTasks; region=" 256 + this.region.getRegionInfo().getEncodedName(); 257 this.exception = e; 258 if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) { 259 server.abort(msg, e); 260 } else { 261 LOG.warn(msg, e); 262 } 263 } 264 // We're done. Set flag then wake up anyone waiting on thread to complete. 265 this.signaller.set(true); 266 synchronized (this.signaller) { 267 this.signaller.notify(); 268 } 269 } 270 271 /** Returns Null or the run exception; call this method after thread is done. */ 272 Throwable getException() { 273 return this.exception; 274 } 275 } 276 277 /** Returns Instance of HRegion if successful open else null. */ 278 private HRegion openRegion() { 279 HRegion region = null; 280 try { 281 // Instantiate the region. This also periodically tickles OPENING 282 // state so master doesn't timeout this region in transition. 283 region = 284 HRegion.openHRegion(this.regionInfo, this.htd, this.rsServices.getWAL(this.regionInfo), 285 this.server.getConfiguration(), this.rsServices, new CancelableProgressable() { 286 @Override 287 public boolean progress() { 288 if (!isRegionStillOpening()) { 289 LOG.warn("Open region aborted since it isn't opening any more"); 290 return false; 291 } 292 return true; 293 } 294 }); 295 } catch (Throwable t) { 296 // We failed open. Our caller will see the 'null' return value 297 // and transition the node back to FAILED_OPEN. If that fails, 298 // we rely on the Timeout Monitor in the master to reassign. 299 LOG.error("Failed open of region=" + this.regionInfo.getRegionNameAsString(), t); 300 } 301 return region; 302 } 303 304 private void cleanupFailedOpen(final HRegion region) throws IOException { 305 if (region != null) { 306 this.rsServices.removeRegion(region, null); 307 region.close(); 308 } 309 } 310 311 private static boolean isRegionStillOpening(RegionInfo regionInfo, 312 RegionServerServices rsServices) { 313 byte[] encodedName = regionInfo.getEncodedNameAsBytes(); 314 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName); 315 return Boolean.TRUE.equals(action); // true means opening for RIT 316 } 317 318 private boolean isRegionStillOpening() { 319 return isRegionStillOpening(regionInfo, rsServices); 320 } 321}