001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.handler; 019 020import static org.apache.hadoop.hbase.regionserver.CompactSplit.HBASE_REGION_SERVER_ENABLE_COMPACTION; 021 022import java.io.IOException; 023import java.util.concurrent.atomic.AtomicBoolean; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.Server; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.client.TableDescriptor; 028import org.apache.hadoop.hbase.executor.EventHandler; 029import org.apache.hadoop.hbase.executor.EventType; 030import org.apache.hadoop.hbase.procedure2.Procedure; 031import org.apache.hadoop.hbase.regionserver.HRegion; 032import org.apache.hadoop.hbase.regionserver.HRegionServer; 033import org.apache.hadoop.hbase.regionserver.RegionServerServices; 034import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext; 035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 036import org.apache.hadoop.hbase.util.CancelableProgressable; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 042 043/** 044 * Handles opening of a region on a region server. 045 * <p> 046 * This is executed after receiving an OPEN RPC from the master or client. 047 * @deprecated Keep it here only for compatible 048 * @see AssignRegionHandler 049 */ 050@Deprecated 051@InterfaceAudience.Private 052public class OpenRegionHandler extends EventHandler { 053 private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class); 054 055 protected final RegionServerServices rsServices; 056 057 private final RegionInfo regionInfo; 058 private final TableDescriptor htd; 059 private final long masterSystemTime; 060 061 public OpenRegionHandler(final Server server, final RegionServerServices rsServices, 062 RegionInfo regionInfo, TableDescriptor htd, long masterSystemTime) { 063 this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION); 064 } 065 066 protected OpenRegionHandler(final Server server, final RegionServerServices rsServices, 067 final RegionInfo regionInfo, final TableDescriptor htd, long masterSystemTime, 068 EventType eventType) { 069 super(server, eventType); 070 this.rsServices = rsServices; 071 this.regionInfo = regionInfo; 072 this.htd = htd; 073 this.masterSystemTime = masterSystemTime; 074 } 075 076 public RegionInfo getRegionInfo() { 077 return regionInfo; 078 } 079 080 @Override 081 public void process() throws IOException { 082 boolean openSuccessful = false; 083 final String regionName = regionInfo.getRegionNameAsString(); 084 HRegion region = null; 085 086 try { 087 if (this.server.isStopped() || this.rsServices.isStopping()) { 088 return; 089 } 090 final String encodedName = regionInfo.getEncodedName(); 091 092 // 2 different difficult situations can occur 093 // 1) The opening was cancelled. This is an expected situation 094 // 2) The region is now marked as online while we're suppose to open. This would be a bug. 095 096 // Check that this region is not already online 097 if (this.rsServices.getRegion(encodedName) != null) { 098 LOG.error( 099 "Region " + encodedName + " was already online when we started processing the opening. " 100 + "Marking this new attempt as failed"); 101 return; 102 } 103 104 // Check that we're still supposed to open the region. 105 // If fails, just return. Someone stole the region from under us. 106 if (!isRegionStillOpening()) { 107 LOG.error("Region " + encodedName + " opening cancelled"); 108 return; 109 } 110 111 // Open region. After a successful open, failures in subsequent 112 // processing needs to do a close as part of cleanup. 113 region = openRegion(); 114 if (region == null) { 115 return; 116 } 117 118 if ( 119 !updateMeta(region, masterSystemTime) || this.server.isStopped() 120 || this.rsServices.isStopping() 121 ) { 122 return; 123 } 124 125 if (!isRegionStillOpening()) { 126 return; 127 } 128 129 // Successful region open, and add it to MutableOnlineRegions 130 this.rsServices.addRegion(region); 131 openSuccessful = true; 132 133 // Done! Successful region open 134 LOG.debug("Opened " + regionName + " on " + this.server.getServerName()); 135 } finally { 136 // Do all clean up here 137 if (!openSuccessful) { 138 doCleanUpOnFailedOpen(region); 139 } 140 final Boolean current = this.rsServices.getRegionsInTransitionInRS() 141 .remove(this.regionInfo.getEncodedNameAsBytes()); 142 143 // Let's check if we have met a race condition on open cancellation.... 144 // A better solution would be to not have any race condition. 145 // this.rsServices.getRegionsInTransitionInRS().remove( 146 // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE); 147 // would help. 148 if (openSuccessful) { 149 if (current == null) { // Should NEVER happen, but let's be paranoid. 150 LOG.error("Bad state: we've just opened a region that was NOT in transition. Region=" 151 + regionName); 152 } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're 153 // really unlucky. 154 LOG.error("Race condition: we've finished to open a region, while a close was requested " 155 + " on region=" + regionName + ". It can be a critical error, as a region that" 156 + " should be closed is now opened. Closing it now"); 157 cleanupFailedOpen(region); 158 } 159 } 160 } 161 } 162 163 private void doCleanUpOnFailedOpen(HRegion region) throws IOException { 164 try { 165 if (region != null) { 166 cleanupFailedOpen(region); 167 } 168 } finally { 169 rsServices 170 .reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.FAILED_OPEN, 171 HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo, -1)); 172 } 173 } 174 175 /** 176 * Update ZK or META. This can take a while if for example the hbase:meta is not available -- if 177 * server hosting hbase:meta crashed and we are waiting on it to come back -- so run in a thread 178 * and keep updating znode state meantime so master doesn't timeout our region-in-transition. 179 * Caller must cleanup region if this fails. 180 */ 181 private boolean updateMeta(final HRegion r, long masterSystemTime) { 182 if (this.server.isStopped() || this.rsServices.isStopping()) { 183 return false; 184 } 185 // Object we do wait/notify on. Make it boolean. If set, we're done. 186 // Else, wait. 187 final AtomicBoolean signaller = new AtomicBoolean(false); 188 PostOpenDeployTasksThread t = 189 new PostOpenDeployTasksThread(r, this.server, this.rsServices, signaller, masterSystemTime); 190 t.start(); 191 // Post open deploy task: 192 // meta => update meta location in ZK 193 // other region => update meta 194 while ( 195 !signaller.get() && t.isAlive() && !this.server.isStopped() && !this.rsServices.isStopping() 196 && isRegionStillOpening() 197 ) { 198 synchronized (signaller) { 199 try { 200 // Wait for 10 seconds, so that server shutdown 201 // won't take too long if this thread happens to run. 202 if (!signaller.get()) signaller.wait(10000); 203 } catch (InterruptedException e) { 204 // Go to the loop check. 205 } 206 } 207 } 208 // Is thread still alive? We may have left above loop because server is 209 // stopping or we timed out the edit. Is so, interrupt it. 210 if (t.isAlive()) { 211 if (!signaller.get()) { 212 // Thread still running; interrupt 213 LOG.debug("Interrupting thread " + t); 214 t.interrupt(); 215 } 216 try { 217 t.join(); 218 } catch (InterruptedException ie) { 219 LOG.warn("Interrupted joining " + r.getRegionInfo().getRegionNameAsString(), ie); 220 Thread.currentThread().interrupt(); 221 } 222 } 223 224 // Was there an exception opening the region? This should trigger on 225 // InterruptedException too. If so, we failed. 226 return (!Thread.interrupted() && t.getException() == null); 227 } 228 229 /** 230 * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes to 231 * check for exceptions running 232 * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)} 233 */ 234 static class PostOpenDeployTasksThread extends Thread { 235 private Throwable exception = null; 236 private final Server server; 237 private final RegionServerServices services; 238 private final HRegion region; 239 private final AtomicBoolean signaller; 240 private final long masterSystemTime; 241 242 PostOpenDeployTasksThread(final HRegion region, final Server server, 243 final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) { 244 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName()); 245 this.setDaemon(true); 246 this.server = server; 247 this.services = services; 248 this.region = region; 249 this.signaller = signaller; 250 this.masterSystemTime = masterSystemTime; 251 } 252 253 @Override 254 public void run() { 255 try { 256 this.services.postOpenDeployTasks( 257 new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime, -1)); 258 } catch (Throwable e) { 259 String msg = "Exception running postOpenDeployTasks; region=" 260 + this.region.getRegionInfo().getEncodedName(); 261 this.exception = e; 262 if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) { 263 server.abort(msg, e); 264 } else { 265 LOG.warn(msg, e); 266 } 267 } 268 // We're done. Set flag then wake up anyone waiting on thread to complete. 269 this.signaller.set(true); 270 synchronized (this.signaller) { 271 this.signaller.notify(); 272 } 273 } 274 275 /** Returns Null or the run exception; call this method after thread is done. */ 276 Throwable getException() { 277 return this.exception; 278 } 279 } 280 281 /** Returns Instance of HRegion if successful open else null. */ 282 private HRegion openRegion() { 283 HRegion region = null; 284 boolean compactionEnabled = 285 ((HRegionServer) server).getCompactSplitThread().isCompactionsEnabled(); 286 this.server.getConfiguration().setBoolean(HBASE_REGION_SERVER_ENABLE_COMPACTION, 287 compactionEnabled); 288 try { 289 // Instantiate the region. This also periodically tickles OPENING 290 // state so master doesn't timeout this region in transition. 291 region = 292 HRegion.openHRegion(this.regionInfo, this.htd, this.rsServices.getWAL(this.regionInfo), 293 this.server.getConfiguration(), this.rsServices, new CancelableProgressable() { 294 @Override 295 public boolean progress() { 296 if (!isRegionStillOpening()) { 297 LOG.warn("Open region aborted since it isn't opening any more"); 298 return false; 299 } 300 return true; 301 } 302 }); 303 } catch (Throwable t) { 304 // We failed open. Our caller will see the 'null' return value 305 // and transition the node back to FAILED_OPEN. If that fails, 306 // we rely on the Timeout Monitor in the master to reassign. 307 LOG.error("Failed open of region=" + this.regionInfo.getRegionNameAsString(), t); 308 } 309 return region; 310 } 311 312 private void cleanupFailedOpen(final HRegion region) throws IOException { 313 if (region != null) { 314 this.rsServices.removeRegion(region, null); 315 region.close(); 316 } 317 } 318 319 private static boolean isRegionStillOpening(RegionInfo regionInfo, 320 RegionServerServices rsServices) { 321 byte[] encodedName = regionInfo.getEncodedNameAsBytes(); 322 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName); 323 return Boolean.TRUE.equals(action); // true means opening for RIT 324 } 325 326 private boolean isRegionStillOpening() { 327 return isRegionStillOpening(regionInfo, rsServices); 328 } 329}