001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.regionserver.handler; 020 021import java.io.IOException; 022import java.util.concurrent.atomic.AtomicBoolean; 023 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.Server; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.client.TableDescriptor; 028import org.apache.hadoop.hbase.executor.EventHandler; 029import org.apache.hadoop.hbase.executor.EventType; 030import org.apache.hadoop.hbase.procedure2.Procedure; 031import org.apache.hadoop.hbase.regionserver.HRegion; 032import org.apache.hadoop.hbase.regionserver.RegionServerServices; 033import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext; 034import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 035import org.apache.hadoop.hbase.util.CancelableProgressable; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 040/** 041 * Handles opening of a region on a region server. 042 * <p> 043 * This is executed after receiving an OPEN RPC from the master or client. 044 * @deprecated Keep it here only for compatible 045 * @see AssignRegionHandler 046 */ 047@Deprecated 048@InterfaceAudience.Private 049public class OpenRegionHandler extends EventHandler { 050 private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class); 051 052 protected final RegionServerServices rsServices; 053 054 private final RegionInfo regionInfo; 055 private final TableDescriptor htd; 056 private final long masterSystemTime; 057 058 public OpenRegionHandler(final Server server, 059 final RegionServerServices rsServices, RegionInfo regionInfo, 060 TableDescriptor htd, long masterSystemTime) { 061 this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION); 062 } 063 064 protected OpenRegionHandler(final Server server, 065 final RegionServerServices rsServices, final RegionInfo regionInfo, 066 final TableDescriptor htd, long masterSystemTime, EventType eventType) { 067 super(server, eventType); 068 this.rsServices = rsServices; 069 this.regionInfo = regionInfo; 070 this.htd = htd; 071 this.masterSystemTime = masterSystemTime; 072 } 073 074 public RegionInfo getRegionInfo() { 075 return regionInfo; 076 } 077 078 @Override 079 public void process() throws IOException { 080 boolean openSuccessful = false; 081 final String regionName = regionInfo.getRegionNameAsString(); 082 HRegion region = null; 083 084 try { 085 if (this.server.isStopped() || this.rsServices.isStopping()) { 086 return; 087 } 088 final String encodedName = regionInfo.getEncodedName(); 089 090 // 2 different difficult situations can occur 091 // 1) The opening was cancelled. This is an expected situation 092 // 2) The region is now marked as online while we're suppose to open. This would be a bug. 093 094 // Check that this region is not already online 095 if (this.rsServices.getRegion(encodedName) != null) { 096 LOG.error("Region " + encodedName + 097 " was already online when we started processing the opening. " + 098 "Marking this new attempt as failed"); 099 return; 100 } 101 102 // Check that we're still supposed to open the region. 103 // If fails, just return. Someone stole the region from under us. 104 if (!isRegionStillOpening()){ 105 LOG.error("Region " + encodedName + " opening cancelled"); 106 return; 107 } 108 109 // Open region. After a successful open, failures in subsequent 110 // processing needs to do a close as part of cleanup. 111 region = openRegion(); 112 if (region == null) { 113 return; 114 } 115 116 if (!updateMeta(region, masterSystemTime) || this.server.isStopped() || 117 this.rsServices.isStopping()) { 118 return; 119 } 120 121 if (!isRegionStillOpening()) { 122 return; 123 } 124 125 // Successful region open, and add it to MutableOnlineRegions 126 this.rsServices.addRegion(region); 127 openSuccessful = true; 128 129 // Done! Successful region open 130 LOG.debug("Opened " + regionName + " on " + this.server.getServerName()); 131 } finally { 132 // Do all clean up here 133 if (!openSuccessful) { 134 doCleanUpOnFailedOpen(region); 135 } 136 final Boolean current = this.rsServices.getRegionsInTransitionInRS(). 137 remove(this.regionInfo.getEncodedNameAsBytes()); 138 139 // Let's check if we have met a race condition on open cancellation.... 140 // A better solution would be to not have any race condition. 141 // this.rsServices.getRegionsInTransitionInRS().remove( 142 // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE); 143 // would help. 144 if (openSuccessful) { 145 if (current == null) { // Should NEVER happen, but let's be paranoid. 146 LOG.error("Bad state: we've just opened a region that was NOT in transition. Region=" 147 + regionName); 148 } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're 149 // really unlucky. 150 LOG.error("Race condition: we've finished to open a region, while a close was requested " 151 + " on region=" + regionName + ". It can be a critical error, as a region that" 152 + " should be closed is now opened. Closing it now"); 153 cleanupFailedOpen(region); 154 } 155 } 156 } 157 } 158 159 private void doCleanUpOnFailedOpen(HRegion region) throws IOException { 160 try { 161 if (region != null) { 162 cleanupFailedOpen(region); 163 } 164 } finally { 165 rsServices.reportRegionStateTransition(new RegionStateTransitionContext( 166 TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo)); 167 } 168 } 169 170 /** 171 * Update ZK or META. This can take a while if for example the 172 * hbase:meta is not available -- if server hosting hbase:meta crashed and we are 173 * waiting on it to come back -- so run in a thread and keep updating znode 174 * state meantime so master doesn't timeout our region-in-transition. 175 * Caller must cleanup region if this fails. 176 */ 177 private boolean updateMeta(final HRegion r, long masterSystemTime) { 178 if (this.server.isStopped() || this.rsServices.isStopping()) { 179 return false; 180 } 181 // Object we do wait/notify on. Make it boolean. If set, we're done. 182 // Else, wait. 183 final AtomicBoolean signaller = new AtomicBoolean(false); 184 PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r, 185 this.server, this.rsServices, signaller, masterSystemTime); 186 t.start(); 187 // Post open deploy task: 188 // meta => update meta location in ZK 189 // other region => update meta 190 while (!signaller.get() && t.isAlive() && !this.server.isStopped() && 191 !this.rsServices.isStopping() && isRegionStillOpening()) { 192 synchronized (signaller) { 193 try { 194 // Wait for 10 seconds, so that server shutdown 195 // won't take too long if this thread happens to run. 196 if (!signaller.get()) signaller.wait(10000); 197 } catch (InterruptedException e) { 198 // Go to the loop check. 199 } 200 } 201 } 202 // Is thread still alive? We may have left above loop because server is 203 // stopping or we timed out the edit. Is so, interrupt it. 204 if (t.isAlive()) { 205 if (!signaller.get()) { 206 // Thread still running; interrupt 207 LOG.debug("Interrupting thread " + t); 208 t.interrupt(); 209 } 210 try { 211 t.join(); 212 } catch (InterruptedException ie) { 213 LOG.warn("Interrupted joining " + 214 r.getRegionInfo().getRegionNameAsString(), ie); 215 Thread.currentThread().interrupt(); 216 } 217 } 218 219 // Was there an exception opening the region? This should trigger on 220 // InterruptedException too. If so, we failed. 221 return (!Thread.interrupted() && t.getException() == null); 222 } 223 224 /** 225 * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes 226 * to check for exceptions running 227 * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)} 228 */ 229 static class PostOpenDeployTasksThread extends Thread { 230 private Throwable exception = null; 231 private final Server server; 232 private final RegionServerServices services; 233 private final HRegion region; 234 private final AtomicBoolean signaller; 235 private final long masterSystemTime; 236 237 PostOpenDeployTasksThread(final HRegion region, final Server server, 238 final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) { 239 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName()); 240 this.setDaemon(true); 241 this.server = server; 242 this.services = services; 243 this.region = region; 244 this.signaller = signaller; 245 this.masterSystemTime = masterSystemTime; 246 } 247 248 @Override 249 public void run() { 250 try { 251 this.services.postOpenDeployTasks( 252 new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime)); 253 } catch (Throwable e) { 254 String msg = "Exception running postOpenDeployTasks; region=" + 255 this.region.getRegionInfo().getEncodedName(); 256 this.exception = e; 257 if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) { 258 server.abort(msg, e); 259 } else { 260 LOG.warn(msg, e); 261 } 262 } 263 // We're done. Set flag then wake up anyone waiting on thread to complete. 264 this.signaller.set(true); 265 synchronized (this.signaller) { 266 this.signaller.notify(); 267 } 268 } 269 270 /** 271 * @return Null or the run exception; call this method after thread is done. 272 */ 273 Throwable getException() { 274 return this.exception; 275 } 276 } 277 278 /** 279 * @return Instance of HRegion if successful open else null. 280 */ 281 private HRegion openRegion() { 282 HRegion region = null; 283 try { 284 // Instantiate the region. This also periodically tickles OPENING 285 // state so master doesn't timeout this region in transition. 286 region = HRegion.openHRegion(this.regionInfo, this.htd, 287 this.rsServices.getWAL(this.regionInfo), 288 this.server.getConfiguration(), 289 this.rsServices, 290 new CancelableProgressable() { 291 @Override 292 public boolean progress() { 293 if (!isRegionStillOpening()) { 294 LOG.warn("Open region aborted since it isn't opening any more"); 295 return false; 296 } 297 return true; 298 } 299 }); 300 } catch (Throwable t) { 301 // We failed open. Our caller will see the 'null' return value 302 // and transition the node back to FAILED_OPEN. If that fails, 303 // we rely on the Timeout Monitor in the master to reassign. 304 LOG.error( 305 "Failed open of region=" + this.regionInfo.getRegionNameAsString(), t); 306 } 307 return region; 308 } 309 310 private void cleanupFailedOpen(final HRegion region) throws IOException { 311 if (region != null) { 312 this.rsServices.removeRegion(region, null); 313 region.close(); 314 } 315 } 316 317 private static boolean isRegionStillOpening( 318 RegionInfo regionInfo, RegionServerServices rsServices) { 319 byte[] encodedName = regionInfo.getEncodedNameAsBytes(); 320 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName); 321 return Boolean.TRUE.equals(action); // true means opening for RIT 322 } 323 324 private boolean isRegionStillOpening() { 325 return isRegionStillOpening(regionInfo, rsServices); 326 } 327}