001/* 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.master.assignment; 021 022import java.io.IOException; 023import java.util.Comparator; 024 025import org.apache.hadoop.hbase.ServerName; 026import org.apache.hadoop.hbase.TableName; 027import org.apache.hadoop.hbase.client.RegionInfo; 028import org.apache.hadoop.hbase.client.RetriesExhaustedException; 029import org.apache.hadoop.hbase.client.TableState; 030import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; 031import org.apache.hadoop.hbase.master.MasterServices; 032import org.apache.hadoop.hbase.master.RegionState.State; 033import org.apache.hadoop.hbase.master.TableStateManager; 034import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; 035import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 036import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation; 037import org.apache.hadoop.hbase.master.procedure.ServerCrashException; 038import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 039import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 040import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 041import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 046import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.AssignRegionStateData; 047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState; 048import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 049 050/** 051 * Procedure that describe the assignment of a single region. 052 * There can only be one RegionTransitionProcedure per region running at a time 053 * since each procedure takes a lock on the region. 054 * 055 * <p>The Assign starts by pushing the "assign" operation to the AssignmentManager 056 * and then will go in a "waiting" state. 057 * The AM will batch the "assign" requests and ask the Balancer where to put 058 * the region (the various policies will be respected: retain, round-robin, random). 059 * Once the AM and the balancer have found a place for the region the procedure 060 * will be resumed and an "open region" request will be placed in the Remote Dispatcher 061 * queue, and the procedure once again will go in a "waiting state". 062 * The Remote Dispatcher will batch the various requests for that server and 063 * they will be sent to the RS for execution. 064 * The RS will complete the open operation by calling master.reportRegionStateTransition(). 065 * The AM will intercept the transition report, and notify the procedure. 066 * The procedure will finish the assignment by publishing to new state on meta 067 * or it will retry the assignment. 068 * 069 * <p>This procedure does not rollback when beyond the first 070 * REGION_TRANSITION_QUEUE step; it will press on trying to assign in the face of 071 * failure. Should we ignore rollback calls to Assign/Unassign then? Or just 072 * remove rollback here? 073 */ 074// TODO: Add being able to assign a region to open read-only. 075@InterfaceAudience.Private 076public class AssignProcedure extends RegionTransitionProcedure { 077 private static final Logger LOG = LoggerFactory.getLogger(AssignProcedure.class); 078 079 /** 080 * Set to true when we need recalibrate -- choose a new target -- because original assign failed. 081 */ 082 private boolean forceNewPlan = false; 083 084 /** 085 * Gets set as desired target on move, merge, etc., when we want to go to a particular server. 086 * We may not be able to respect this request but will try. When it is NOT set, then we ask 087 * the balancer to assign. This value is used below in startTransition to set regionLocation if 088 * non-null. Setting regionLocation in regionServerNode is how we override balancer setting 089 * destination. 090 */ 091 protected volatile ServerName targetServer; 092 093 /** 094 * Comparator that will sort AssignProcedures so meta assigns come first, then system table 095 * assigns and finally user space assigns. 096 */ 097 public static final CompareAssignProcedure COMPARATOR = new CompareAssignProcedure(); 098 099 public AssignProcedure() { 100 // Required by the Procedure framework to create the procedure on replay 101 super(); 102 } 103 104 public AssignProcedure(final RegionInfo regionInfo) { 105 this(regionInfo, null); 106 } 107 108 public AssignProcedure(final RegionInfo regionInfo, final ServerName destinationServer) { 109 this(regionInfo, destinationServer, false); 110 } 111 112 public AssignProcedure(final RegionInfo regionInfo, final ServerName destinationServer, 113 boolean override) { 114 super(regionInfo, override); 115 this.targetServer = destinationServer; 116 } 117 118 @Override 119 public TableOperationType getTableOperationType() { 120 return TableOperationType.REGION_ASSIGN; 121 } 122 123 @Override 124 protected boolean isRollbackSupported(final RegionTransitionState state) { 125 switch (state) { 126 case REGION_TRANSITION_QUEUE: 127 return true; 128 default: 129 return false; 130 } 131 } 132 133 @Override 134 protected void serializeStateData(ProcedureStateSerializer serializer) 135 throws IOException { 136 final AssignRegionStateData.Builder state = AssignRegionStateData.newBuilder() 137 .setTransitionState(getTransitionState()) 138 .setRegionInfo(ProtobufUtil.toRegionInfo(getRegionInfo())); 139 if (forceNewPlan) { 140 state.setForceNewPlan(true); 141 } 142 if (this.targetServer != null) { 143 state.setTargetServer(ProtobufUtil.toServerName(this.targetServer)); 144 } 145 if (getAttempt() > 0) { 146 state.setAttempt(getAttempt()); 147 } 148 if (isOverride()) { 149 state.setOverride(isOverride()); 150 } 151 serializer.serialize(state.build()); 152 } 153 154 @Override 155 protected void deserializeStateData(ProcedureStateSerializer serializer) 156 throws IOException { 157 final AssignRegionStateData state = serializer.deserialize(AssignRegionStateData.class); 158 setTransitionState(state.getTransitionState()); 159 setRegionInfo(ProtobufUtil.toRegionInfo(state.getRegionInfo())); 160 forceNewPlan = state.getForceNewPlan(); 161 setOverride(state.getOverride()); 162 if (state.hasTargetServer()) { 163 this.targetServer = ProtobufUtil.toServerName(state.getTargetServer()); 164 } 165 if (state.hasAttempt()) { 166 setAttempt(state.getAttempt()); 167 } 168 } 169 170 /** 171 * Used by ServerCrashProcedure too skip creating Assigns if not needed. 172 * @return Skip out on the assign; returns 'true'/assign if exception. 173 */ 174 public static boolean assign(MasterServices masterServices, RegionInfo ri) { 175 try { 176 return assign(masterServices, 177 masterServices.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(ri)); 178 } catch (IOException e) { 179 LOG.warn("Letting assign proceed", e); 180 } 181 return true; 182 } 183 184 protected static boolean assign(MasterServices masterServices, final RegionStateNode regionNode) 185 throws IOException { 186 // If the region is already open we can't do much... 187 if (regionNode.isInState(State.OPEN) && 188 masterServices.getServerManager().isServerOnline(regionNode.getRegionLocation())) { 189 LOG.info("Assigned, not reassigning {}", regionNode.toShortString()); 190 return false; 191 } 192 // Don't assign if table is in disabling or disabled state. 193 TableStateManager tsm = masterServices.getTableStateManager(); 194 TableName tn = regionNode.getRegionInfo().getTable(); 195 TableState ts = tsm.getTableState(tn); 196 if (ts.isDisabledOrDisabling()) { 197 LOG.info("{} so SKIPPING assign of {}", ts, regionNode.getRegionInfo().getEncodedName()); 198 return false; 199 } 200 return true; 201 } 202 203 @Override 204 protected boolean startTransition(final MasterProcedureEnv env, final RegionStateNode regionNode) 205 throws IOException { 206 if (!assign(env.getMasterServices(), regionNode)) { 207 return false; 208 } 209 // If the region is SPLIT, we can't assign it. But state might be CLOSED, rather than 210 // SPLIT which is what a region gets set to when unassigned as part of SPLIT. FIX. 211 if (regionNode.isInState(State.SPLIT) || 212 (regionNode.getRegionInfo().isOffline() && regionNode.getRegionInfo().isSplit())) { 213 LOG.info("SPLIT, cannot be assigned; " + this + "; " + regionNode + 214 "; hri=" + regionNode.getRegionInfo()); 215 return false; 216 } 217 218 // If we haven't started the operation yet, we can abort 219 if (aborted.get() && regionNode.isInState(State.CLOSED, State.OFFLINE)) { 220 if (incrementAndCheckMaxAttempts(env, regionNode)) { 221 regionNode.setState(State.FAILED_OPEN); 222 setFailure(getClass().getSimpleName(), 223 new RetriesExhaustedException("Max attempts exceeded")); 224 } else { 225 setAbortFailure(getClass().getSimpleName(), "Abort requested"); 226 } 227 return false; 228 } 229 230 // Send assign (add into assign-pool). We call regionNode.offline below to set state to 231 // OFFLINE and to clear the region location. Setting a new regionLocation here is how we retain 232 // old assignment or specify target server if a move or merge. See 233 // AssignmentManager#processAssignQueue. Otherwise, balancer gives us location. 234 // TODO: Region will be set into OFFLINE state below regardless of what its previous state was 235 // This is dangerous? Wrong? What if region was in an unexpected state? 236 ServerName lastRegionLocation = regionNode.offline(); 237 boolean retain = false; 238 if (!forceNewPlan) { 239 if (this.targetServer != null) { 240 retain = targetServer.equals(lastRegionLocation); 241 regionNode.setRegionLocation(targetServer); 242 } else { 243 if (lastRegionLocation != null) { 244 // Try and keep the location we had before we offlined. 245 retain = true; 246 regionNode.setRegionLocation(lastRegionLocation); 247 } else if (regionNode.getLastHost() != null) { 248 retain = true; 249 LOG.info("Setting lastHost as the region location " + regionNode.getLastHost()); 250 regionNode.setRegionLocation(regionNode.getLastHost()); 251 } 252 } 253 } 254 LOG.info("Starting " + this + "; " + regionNode.toShortString() + 255 "; forceNewPlan=" + this.forceNewPlan + 256 ", retain=" + retain); 257 env.getAssignmentManager().queueAssign(regionNode); 258 return true; 259 } 260 261 @Override 262 protected boolean updateTransition(final MasterProcedureEnv env, final RegionStateNode regionNode) 263 throws IOException, ProcedureSuspendedException { 264 // TODO: crash if destinationServer is specified and not online 265 // which is also the case when the balancer provided us with a different location. 266 if (LOG.isTraceEnabled()) { 267 LOG.trace("Update " + this + "; " + regionNode.toShortString()); 268 } 269 if (regionNode.getRegionLocation() == null) { 270 setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE); 271 return true; 272 } 273 274 if (!isServerOnline(env, regionNode)) { 275 // TODO: is this correct? should we wait the chore/ssh? 276 LOG.info("Server not online, re-queuing " + this + "; " + regionNode.toShortString()); 277 setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE); 278 return true; 279 } 280 281 if (env.getAssignmentManager().waitServerReportEvent(regionNode.getRegionLocation(), this)) { 282 LOG.info("Early suspend! " + this + "; " + regionNode.toShortString()); 283 throw new ProcedureSuspendedException(); 284 } 285 286 if (regionNode.isInState(State.OPEN)) { 287 LOG.info("Already assigned: " + this + "; " + regionNode.toShortString()); 288 return false; 289 } 290 291 // Transition regionNode State. Set it to OPENING. Update hbase:meta, and add 292 // region to list of regions on the target regionserver. Need to UNDO if failure! 293 env.getAssignmentManager().markRegionAsOpening(regionNode); 294 295 // TODO: Requires a migration to be open by the RS? 296 // regionNode.getFormatVersion() 297 298 if (!addToRemoteDispatcher(env, regionNode.getRegionLocation())) { 299 // Failed the dispatch BUT addToRemoteDispatcher internally does 300 // cleanup on failure -- even the undoing of markRegionAsOpening above -- 301 // so nothing more to do here; in fact we need to get out of here 302 // fast since we've been put back on the scheduler. 303 } 304 305 // We always return true, even if we fail dispatch because addToRemoteDispatcher 306 // failure processing sets state back to REGION_TRANSITION_QUEUE so we try again; 307 // i.e. return true to keep the Procedure running; it has been reset to startover. 308 return true; 309 } 310 311 @Override 312 protected void finishTransition(final MasterProcedureEnv env, final RegionStateNode regionNode) 313 throws IOException { 314 env.getAssignmentManager().markRegionAsOpened(regionNode); 315 // This success may have been after we failed open a few times. Be sure to cleanup any 316 // failed open references. See #incrementAndCheckMaxAttempts and where it is called. 317 env.getAssignmentManager().getRegionStates().removeFromFailedOpen(regionNode.getRegionInfo()); 318 } 319 320 @Override 321 protected void reportTransition(final MasterProcedureEnv env, final RegionStateNode regionNode, 322 final TransitionCode code, final long openSeqNum) throws UnexpectedStateException { 323 switch (code) { 324 case OPENED: 325 if (openSeqNum < 0) { 326 throw new UnexpectedStateException("Received report unexpected " + code + 327 " transition openSeqNum=" + openSeqNum + ", " + regionNode); 328 } 329 if (openSeqNum < regionNode.getOpenSeqNum()) { 330 // Don't bother logging if openSeqNum == 0 331 if (openSeqNum != 0) { 332 LOG.warn("Skipping update of open seqnum with " + openSeqNum + 333 " because current seqnum=" + regionNode.getOpenSeqNum()); 334 } 335 } else { 336 regionNode.setOpenSeqNum(openSeqNum); 337 } 338 // Leave the state here as OPENING for now. We set it to OPEN in 339 // REGION_TRANSITION_FINISH section where we do a bunch of checks. 340 // regionNode.setState(RegionState.State.OPEN, RegionState.State.OPENING); 341 setTransitionState(RegionTransitionState.REGION_TRANSITION_FINISH); 342 break; 343 case FAILED_OPEN: 344 handleFailure(env, regionNode); 345 break; 346 default: 347 throw new UnexpectedStateException("Received report unexpected " + code + 348 " transition openSeqNum=" + openSeqNum + ", " + regionNode.toShortString() + 349 ", " + this + ", expected OPENED or FAILED_OPEN."); 350 } 351 } 352 353 /** 354 * Called when dispatch or subsequent OPEN request fail. Can be run by the 355 * inline dispatch call or later by the ServerCrashProcedure. Our state is 356 * generally OPENING. Cleanup and reset to OFFLINE and put our Procedure 357 * State back to REGION_TRANSITION_QUEUE so the Assign starts over. 358 */ 359 private void handleFailure(final MasterProcedureEnv env, final RegionStateNode regionNode) { 360 if (incrementAndCheckMaxAttempts(env, regionNode)) { 361 aborted.set(true); 362 } 363 this.forceNewPlan = true; 364 this.targetServer = null; 365 regionNode.offline(); 366 // We were moved to OPENING state before dispatch. Undo. It is safe to call 367 // this method because it checks for OPENING first. 368 env.getAssignmentManager().undoRegionAsOpening(regionNode); 369 setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE); 370 } 371 372 private boolean incrementAndCheckMaxAttempts(final MasterProcedureEnv env, 373 final RegionStateNode regionNode) { 374 final int retries = env.getAssignmentManager().getRegionStates(). 375 addToFailedOpen(regionNode).incrementAndGetRetries(); 376 int max = env.getAssignmentManager().getAssignMaxAttempts(); 377 LOG.info("Retry=" + retries + " of max=" + max + "; " + 378 this + "; " + regionNode.toShortString()); 379 return retries >= max; 380 } 381 382 @Override 383 public RemoteOperation remoteCallBuild(final MasterProcedureEnv env, final ServerName serverName) { 384 assert serverName.equals(getRegionState(env).getRegionLocation()); 385 return new RegionOpenOperation(this, getRegionInfo(), 386 env.getAssignmentManager().getFavoredNodes(getRegionInfo()), false); 387 } 388 389 @Override 390 protected boolean remoteCallFailed(final MasterProcedureEnv env, final RegionStateNode regionNode, 391 final IOException exception) { 392 RegionTransitionState tState = getTransitionState(); 393 if (tState == RegionTransitionState.REGION_TRANSITION_FINISH 394 && exception instanceof ServerCrashException) { 395 // if we found that AssignProcedure is at this stage, then ServerCerash handling may/may not 396 // have any effect 397 // depending upon the race between handling of the failure and execution at 398 // REGION_TRANSITION_FINISH state 399 LOG.warn("Assign Procedure is at state:" + tState 400 + ", so Handling of Server Crash may not have any affect"); 401 return false; 402 } 403 handleFailure(env, regionNode); 404 return true; 405 } 406 407 @Override 408 public void toStringClassDetails(StringBuilder sb) { 409 super.toStringClassDetails(sb); 410 if (this.targetServer != null) sb.append(", target=").append(this.targetServer); 411 } 412 413 @Override 414 public ServerName getServer(final MasterProcedureEnv env) { 415 RegionStateNode node = 416 env.getAssignmentManager().getRegionStates().getRegionStateNode(this.getRegionInfo()); 417 if (node == null) return null; 418 return node.getRegionLocation(); 419 } 420 421 @Override 422 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 423 return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics(); 424 } 425 426 /** 427 * Sort AssignProcedures such that meta and system assigns come first before user-space assigns. 428 * Have to do it this way w/ distinct Comparator because Procedure is already Comparable on 429 * 'Env'(?). 430 */ 431 public static class CompareAssignProcedure implements Comparator<AssignProcedure> { 432 @Override 433 public int compare(AssignProcedure left, AssignProcedure right) { 434 if (left.getRegionInfo().isMetaRegion()) { 435 if (right.getRegionInfo().isMetaRegion()) { 436 return RegionInfo.COMPARATOR.compare(left.getRegionInfo(), right.getRegionInfo()); 437 } 438 return -1; 439 } else if (right.getRegionInfo().isMetaRegion()) { 440 return +1; 441 } 442 if (left.getRegionInfo().getTable().isSystemTable()) { 443 if (right.getRegionInfo().getTable().isSystemTable()) { 444 return RegionInfo.COMPARATOR.compare(left.getRegionInfo(), right.getRegionInfo()); 445 } 446 return -1; 447 } else if (right.getRegionInfo().getTable().isSystemTable()) { 448 return +1; 449 } 450 return RegionInfo.COMPARATOR.compare(left.getRegionInfo(), right.getRegionInfo()); 451 } 452 } 453}