001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.master.assignment; 020 021import java.io.IOException; 022import java.io.InterruptedIOException; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.Collection; 026import java.util.Collections; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.concurrent.Callable; 031import java.util.concurrent.ExecutionException; 032import java.util.concurrent.ExecutorService; 033import java.util.concurrent.Executors; 034import java.util.concurrent.Future; 035import java.util.concurrent.TimeUnit; 036import java.util.stream.Collectors; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileSystem; 039import org.apache.hadoop.fs.Path; 040import org.apache.hadoop.hbase.DoNotRetryIOException; 041import org.apache.hadoop.hbase.HConstants; 042import org.apache.hadoop.hbase.ServerName; 043import org.apache.hadoop.hbase.TableName; 044import org.apache.hadoop.hbase.UnknownRegionException; 045import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 046import org.apache.hadoop.hbase.client.MasterSwitchType; 047import org.apache.hadoop.hbase.client.Mutation; 048import org.apache.hadoop.hbase.client.RegionInfo; 049import org.apache.hadoop.hbase.client.RegionInfoBuilder; 050import org.apache.hadoop.hbase.client.RegionReplicaUtil; 051import org.apache.hadoop.hbase.client.TableDescriptor; 052import org.apache.hadoop.hbase.io.hfile.CacheConfig; 053import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 054import org.apache.hadoop.hbase.master.MasterFileSystem; 055import org.apache.hadoop.hbase.master.RegionState.State; 056import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; 057import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 058import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 059import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; 060import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 061import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 062import org.apache.hadoop.hbase.master.procedure.TableQueue; 063import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 064import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 065import org.apache.hadoop.hbase.quotas.QuotaExceededException; 066import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 067import org.apache.hadoop.hbase.regionserver.HStore; 068import org.apache.hadoop.hbase.regionserver.HStoreFile; 069import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 070import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 071import org.apache.hadoop.hbase.util.Bytes; 072import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 073import org.apache.hadoop.hbase.util.FSUtils; 074import org.apache.hadoop.hbase.util.Pair; 075import org.apache.hadoop.hbase.util.Threads; 076import org.apache.hadoop.hbase.wal.WALSplitter; 077import org.apache.hadoop.util.ReflectionUtils; 078import org.apache.yetus.audience.InterfaceAudience; 079import org.slf4j.Logger; 080import org.slf4j.LoggerFactory; 081 082import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 083 084import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 085import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 086import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 088 089/** 090 * The procedure to split a region in a table. 091 * Takes lock on the parent region. 092 * It holds the lock for the life of the procedure. 093 * <p>Throws exception on construction if determines context hostile to spllt (cluster going 094 * down or master is shutting down or table is disabled).</p> 095 */ 096@InterfaceAudience.Private 097public class SplitTableRegionProcedure 098 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 099 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 100 private RegionInfo daughter_1_RI; 101 private RegionInfo daughter_2_RI; 102 private byte[] bestSplitRow; 103 private RegionSplitPolicy splitPolicy; 104 105 public SplitTableRegionProcedure() { 106 // Required by the Procedure framework to create the procedure on replay 107 } 108 109 public SplitTableRegionProcedure(final MasterProcedureEnv env, 110 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 111 super(env, regionToSplit); 112 preflightChecks(env, true); 113 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 114 // we fail-fast on construction. There it skips the split with just a warning. 115 checkOnline(env, regionToSplit); 116 this.bestSplitRow = splitRow; 117 checkSplittable(env, regionToSplit, bestSplitRow); 118 final TableName table = regionToSplit.getTable(); 119 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 120 this.daughter_1_RI = RegionInfoBuilder.newBuilder(table) 121 .setStartKey(regionToSplit.getStartKey()) 122 .setEndKey(bestSplitRow) 123 .setSplit(false) 124 .setRegionId(rid) 125 .build(); 126 this.daughter_2_RI = RegionInfoBuilder.newBuilder(table) 127 .setStartKey(bestSplitRow) 128 .setEndKey(regionToSplit.getEndKey()) 129 .setSplit(false) 130 .setRegionId(rid) 131 .build(); 132 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 133 if(htd.getRegionSplitPolicyClassName() != null) { 134 // Since we don't have region reference here, creating the split policy instance without it. 135 // This can be used to invoke methods which don't require Region reference. This instantiation 136 // of a class on Master-side though it only makes sense on the RegionServer-side is 137 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 138 Class<? extends RegionSplitPolicy> clazz = 139 RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration()); 140 this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration()); 141 } 142 } 143 144 /** 145 * Check whether there are recovered.edits in the parent closed region. 146 * @param env master env 147 * @throws IOException IOException 148 */ 149 static boolean hasRecoveredEdits(MasterProcedureEnv env, RegionInfo ri) throws IOException { 150 return WALSplitter.hasRecoveredEdits(env.getMasterConfiguration(), ri); 151 } 152 153 /** 154 * Check whether the region is splittable 155 * @param env MasterProcedureEnv 156 * @param regionToSplit parent Region to be split 157 * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS 158 * @throws IOException 159 */ 160 private void checkSplittable(final MasterProcedureEnv env, 161 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 162 // Ask the remote RS if this region is splittable. 163 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time. 164 if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 165 throw new IllegalArgumentException ("Can't invoke split on non-default regions directly"); 166 } 167 RegionStateNode node = 168 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 169 IOException splittableCheckIOE = null; 170 boolean splittable = false; 171 if (node != null) { 172 try { 173 if (bestSplitRow == null || bestSplitRow.length == 0) { 174 LOG.info("splitKey isn't explicitly specified, " + " will try to find a best split key from RS"); 175 } 176 // Always set bestSplitRow request as true here, 177 // need to call Region#checkSplit to check it splittable or not 178 GetRegionInfoResponse response = 179 Util.getRegionInfoResponse(env, node.getRegionLocation(), node.getRegionInfo(), true); 180 if(bestSplitRow == null || bestSplitRow.length == 0) { 181 bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 182 } 183 splittable = response.hasSplittable() && response.getSplittable(); 184 185 if (LOG.isDebugEnabled()) { 186 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 187 } 188 } catch (IOException e) { 189 splittableCheckIOE = e; 190 } 191 } 192 193 if (!splittable) { 194 IOException e = new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 195 if (splittableCheckIOE != null) e.initCause(splittableCheckIOE); 196 throw e; 197 } 198 199 if(bestSplitRow == null || bestSplitRow.length == 0) { 200 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " 201 + "maybe table is too small for auto split. For force split, try specifying split row"); 202 } 203 204 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 205 throw new DoNotRetryIOException( 206 "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow)); 207 } 208 209 if (!regionToSplit.containsRow(bestSplitRow)) { 210 throw new DoNotRetryIOException( 211 "Split row is not inside region key range splitKey:" + Bytes.toStringBinary(splitRow) + 212 " region: " + regionToSplit); 213 } 214 } 215 216 /** 217 * Calculate daughter regionid to use. 218 * @param hri Parent {@link RegionInfo} 219 * @return Daughter region id (timestamp) to use. 220 */ 221 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 222 long rid = EnvironmentEdgeManager.currentTime(); 223 // Regionid is timestamp. Can't be less than that of parent else will insert 224 // at wrong location in hbase:meta (See HBASE-710). 225 if (rid < hri.getRegionId()) { 226 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() + 227 " but current time here is " + rid); 228 rid = hri.getRegionId() + 1; 229 } 230 return rid; 231 } 232 233 @Override 234 protected Flow executeFromState(final MasterProcedureEnv env, final SplitTableRegionState state) 235 throws InterruptedException { 236 LOG.trace("{} execute state={}", this, state); 237 238 try { 239 switch (state) { 240 case SPLIT_TABLE_REGION_PREPARE: 241 if (prepareSplitRegion(env)) { 242 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 243 break; 244 } else { 245 return Flow.NO_MORE_STATE; 246 } 247 case SPLIT_TABLE_REGION_PRE_OPERATION: 248 preSplitRegion(env); 249 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 250 break; 251 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 252 addChildProcedure(createUnassignProcedures(env, getRegionReplication(env))); 253 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 254 break; 255 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 256 if (hasRecoveredEdits(env, getRegion())) { 257 // If recovered edits, reopen parent region and then re-run the close by going back to 258 // SPLIT_TABLE_REGION_CLOSE_PARENT_REGION. We might have to cycle here a few times 259 // (TODO: Add being able to open a region in read-only mode). Open the primary replica 260 // in this case only where we just want to pickup the left-out replicated.edits. 261 LOG.info("Found recovered.edits under {}, reopen so we pickup these missed edits!", 262 getRegion().getEncodedName()); 263 addChildProcedure(env.getAssignmentManager().createAssignProcedure(getParentRegion())); 264 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 265 } else { 266 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 267 } 268 break; 269 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 270 createDaughterRegions(env); 271 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 272 break; 273 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 274 writeMaxSequenceIdFile(env); 275 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 276 break; 277 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 278 preSplitRegionBeforeMETA(env); 279 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 280 break; 281 case SPLIT_TABLE_REGION_UPDATE_META: 282 updateMeta(env); 283 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 284 break; 285 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 286 preSplitRegionAfterMETA(env); 287 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 288 break; 289 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 290 addChildProcedure(createAssignProcedures(env, getRegionReplication(env))); 291 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 292 break; 293 case SPLIT_TABLE_REGION_POST_OPERATION: 294 postSplitRegion(env); 295 return Flow.NO_MORE_STATE; 296 default: 297 throw new UnsupportedOperationException(this + " unhandled state=" + state); 298 } 299 } catch (IOException e) { 300 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 301 if (!isRollbackSupported(state)) { 302 // We reach a state that cannot be rolled back. We just need to keep retrying. 303 LOG.warn(msg, e); 304 } else { 305 LOG.error(msg, e); 306 setFailure("master-split-regions", e); 307 } 308 } 309 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 310 return Flow.HAS_MORE_STATE; 311 } 312 313 /** 314 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously 315 * submitted for parent region to be split (rollback doesn't wait on the completion of the 316 * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures. 317 * See HBASE-19851 for details. 318 */ 319 @Override 320 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 321 throws IOException, InterruptedException { 322 LOG.trace("{} rollback state={}", this, state); 323 324 try { 325 switch (state) { 326 case SPLIT_TABLE_REGION_POST_OPERATION: 327 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 328 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 329 case SPLIT_TABLE_REGION_UPDATE_META: 330 // PONR 331 throw new UnsupportedOperationException(this + " unhandled state=" + state); 332 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 333 break; 334 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 335 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 336 // Doing nothing, as re-open parent region would clean up daughter region directories. 337 break; 338 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 339 // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, 340 // we will bring parent region online 341 break; 342 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 343 openParentRegion(env); 344 break; 345 case SPLIT_TABLE_REGION_PRE_OPERATION: 346 postRollBackSplitRegion(env); 347 break; 348 case SPLIT_TABLE_REGION_PREPARE: 349 break; // nothing to do 350 default: 351 throw new UnsupportedOperationException(this + " unhandled state=" + state); 352 } 353 } catch (IOException e) { 354 // This will be retried. Unless there is a bug in the code, 355 // this should be just a "temporary error" (e.g. network down) 356 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + 357 " for splitting the region " 358 + getParentRegion().getEncodedName() + " in table " + getTableName(), e); 359 throw e; 360 } 361 } 362 363 /* 364 * Check whether we are in the state that can be rollback 365 */ 366 @Override 367 protected boolean isRollbackSupported(final SplitTableRegionState state) { 368 switch (state) { 369 case SPLIT_TABLE_REGION_POST_OPERATION: 370 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 371 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 372 case SPLIT_TABLE_REGION_UPDATE_META: 373 // It is not safe to rollback if we reach to these states. 374 return false; 375 default: 376 break; 377 } 378 return true; 379 } 380 381 @Override 382 protected SplitTableRegionState getState(final int stateId) { 383 return SplitTableRegionState.forNumber(stateId); 384 } 385 386 @Override 387 protected int getStateId(final SplitTableRegionState state) { 388 return state.getNumber(); 389 } 390 391 @Override 392 protected SplitTableRegionState getInitialState() { 393 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 394 } 395 396 @Override 397 protected void serializeStateData(ProcedureStateSerializer serializer) 398 throws IOException { 399 super.serializeStateData(serializer); 400 401 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 402 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 403 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 404 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 405 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI)) 406 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI)); 407 serializer.serialize(splitTableRegionMsg.build()); 408 } 409 410 @Override 411 protected void deserializeStateData(ProcedureStateSerializer serializer) 412 throws IOException { 413 super.deserializeStateData(serializer); 414 415 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 416 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 417 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 418 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 419 assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); 420 daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 421 daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 422 } 423 424 @Override 425 public void toStringClassDetails(StringBuilder sb) { 426 sb.append(getClass().getSimpleName()); 427 sb.append(" table="); 428 sb.append(getTableName()); 429 sb.append(", parent="); 430 sb.append(getParentRegion().getShortNameToLog()); 431 sb.append(", daughterA="); 432 sb.append(daughter_1_RI.getShortNameToLog()); 433 sb.append(", daughterB="); 434 sb.append(daughter_2_RI.getShortNameToLog()); 435 } 436 437 private RegionInfo getParentRegion() { 438 return getRegion(); 439 } 440 441 @Override 442 public TableOperationType getTableOperationType() { 443 return TableOperationType.REGION_SPLIT; 444 } 445 446 @Override 447 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 448 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 449 } 450 451 private byte[] getSplitRow() { 452 return daughter_2_RI.getStartKey(); 453 } 454 455 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 456 457 /** 458 * Prepare to Split region. 459 * @param env MasterProcedureEnv 460 */ 461 @VisibleForTesting 462 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 463 // Fail if we are taking snapshot for the given table 464 if (env.getMasterServices().getSnapshotManager() 465 .isTakingSnapshot(getParentRegion().getTable())) { 466 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() + 467 ", because we are taking snapshot for the table " + getParentRegion().getTable())); 468 return false; 469 } 470 // Check whether the region is splittable 471 RegionStateNode node = 472 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 473 474 if (node == null) { 475 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 476 } 477 478 RegionInfo parentHRI = node.getRegionInfo(); 479 if (parentHRI == null) { 480 LOG.info("Unsplittable; parent region is null; node={}", node); 481 return false; 482 } 483 // Lookup the parent HRI state from the AM, which has the latest updated info. 484 // Protect against the case where concurrent SPLIT requests came in and succeeded 485 // just before us. 486 if (node.isInState(State.SPLIT)) { 487 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 488 return false; 489 } 490 if (parentHRI.isSplit() || parentHRI.isOffline()) { 491 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 492 return false; 493 } 494 495 // expected parent to be online or closed 496 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 497 // We may have SPLIT already? 498 setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + 499 " FAILED because state=" + node.getState() + "; expected " + 500 Arrays.toString(EXPECTED_SPLIT_STATES))); 501 return false; 502 } 503 504 // Since we have the lock and the master is coordinating the operation 505 // we are always able to split the region 506 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 507 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 508 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() + 509 " failed due to split switch off")); 510 return false; 511 } 512 513 // See HBASE-21395, for 2.0.x and 2.1.x only. 514 // A safe fence here, if there is a table procedure going on, abort the split. 515 // There some cases that may lead to table procedure roll back (more serious 516 // than roll back the split procedure here), or the split parent was brought online 517 // by the table procedure because of the race between split procedure and table procedure 518 List<AbstractStateMachineTableProcedure> tableProcedures = env 519 .getMasterServices().getProcedures().stream() 520 .filter(p -> p instanceof AbstractStateMachineTableProcedure) 521 .map(p -> (AbstractStateMachineTableProcedure) p) 522 .filter(p -> p.getTableName().equals(getParentRegion().getTable()) && 523 !p.isFinished() && TableQueue.requireTableExclusiveLock(p)) 524 .collect(Collectors.toList()); 525 if (tableProcedures != null && tableProcedures.size() > 0) { 526 throw new DoNotRetryIOException(tableProcedures.get(0).toString() 527 + " is going on against the same table, abort the split of " + this 528 .toString()); 529 } 530 531 // set node state as SPLITTING 532 node.setState(State.SPLITTING); 533 534 return true; 535 } 536 537 /** 538 * Action before splitting region in a table. 539 * @param env MasterProcedureEnv 540 */ 541 private void preSplitRegion(final MasterProcedureEnv env) 542 throws IOException, InterruptedException { 543 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 544 if (cpHost != null) { 545 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 546 } 547 548 // TODO: Clean up split and merge. Currently all over the place. 549 // Notify QuotaManager and RegionNormalizer 550 try { 551 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); 552 } catch (QuotaExceededException e) { 553 env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(), 554 NormalizationPlan.PlanType.SPLIT); 555 throw e; 556 } 557 } 558 559 /** 560 * Action after rollback a split table region action. 561 * @param env MasterProcedureEnv 562 */ 563 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 564 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 565 if (cpHost != null) { 566 cpHost.postRollBackSplitRegionAction(getUser()); 567 } 568 } 569 570 /** 571 * Rollback close parent region 572 * @param env MasterProcedureEnv 573 */ 574 private void openParentRegion(final MasterProcedureEnv env) throws IOException { 575 // Check whether the region is closed; if so, open it in the same server 576 final int regionReplication = getRegionReplication(env); 577 final ServerName serverName = getParentRegionServerName(env); 578 579 final AssignProcedure[] procs = createAssignProcedures(regionReplication, env, 580 Collections.singletonList(getParentRegion()), serverName); 581 env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); 582 } 583 584 /** 585 * Create daughter regions 586 * @param env MasterProcedureEnv 587 */ 588 @VisibleForTesting 589 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 590 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 591 final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName()); 592 final FileSystem fs = mfs.getFileSystem(); 593 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 594 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 595 regionFs.createSplitsDir(daughter_1_RI, daughter_2_RI); 596 597 Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); 598 599 assertReferenceFileCount(fs, expectedReferences.getFirst(), 600 regionFs.getSplitsDir(daughter_1_RI)); 601 //Move the files from the temporary .splits to the final /table/region directory 602 regionFs.commitDaughterRegion(daughter_1_RI); 603 assertReferenceFileCount(fs, expectedReferences.getFirst(), 604 new Path(tabledir, daughter_1_RI.getEncodedName())); 605 606 assertReferenceFileCount(fs, expectedReferences.getSecond(), 607 regionFs.getSplitsDir(daughter_2_RI)); 608 regionFs.commitDaughterRegion(daughter_2_RI); 609 assertReferenceFileCount(fs, expectedReferences.getSecond(), 610 new Path(tabledir, daughter_2_RI.getEncodedName())); 611 } 612 613 /** 614 * Create Split directory 615 * @param env MasterProcedureEnv 616 */ 617 private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, 618 final HRegionFileSystem regionFs) throws IOException { 619 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 620 final Configuration conf = env.getMasterConfiguration(); 621 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 622 // The following code sets up a thread pool executor with as many slots as 623 // there's files to split. It then fires up everything, waits for 624 // completion and finally checks for any exception 625 // 626 // Note: splitStoreFiles creates daughter region dirs under the parent splits dir 627 // Nothing to unroll here if failure -- re-run createSplitsDir will 628 // clean this up. 629 int nbFiles = 0; 630 final Map<String, Collection<StoreFileInfo>> files = 631 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 632 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 633 String family = cfd.getNameAsString(); 634 Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family); 635 if (sfis == null) { 636 continue; 637 } 638 Collection<StoreFileInfo> filteredSfis = null; 639 for (StoreFileInfo sfi : sfis) { 640 // Filter. There is a lag cleaning up compacted reference files. They get cleared 641 // after a delay in case outstanding Scanners still have references. Because of this, 642 // the listing of the Store content may have straggler reference files. Skip these. 643 // It should be safe to skip references at this point because we checked above with 644 // the region if it thinks it is splittable and if we are here, it thinks it is 645 // splitable. 646 if (sfi.isReference()) { 647 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 648 continue; 649 } 650 if (filteredSfis == null) { 651 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 652 files.put(family, filteredSfis); 653 } 654 filteredSfis.add(sfi); 655 nbFiles++; 656 } 657 } 658 if (nbFiles == 0) { 659 // no file needs to be splitted. 660 return new Pair<Integer, Integer>(0, 0); 661 } 662 // Max #threads is the smaller of the number of storefiles or the default max determined above. 663 int maxThreads = Math.min( 664 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 665 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 666 nbFiles); 667 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" + 668 getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 669 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 670 Threads.newDaemonThreadFactory("StoreFileSplitter-%1$d")); 671 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 672 673 // Split each store file. 674 for (Map.Entry<String, Collection<StoreFileInfo>>e: files.entrySet()) { 675 byte [] familyName = Bytes.toBytes(e.getKey()); 676 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 677 final Collection<StoreFileInfo> storeFiles = e.getValue(); 678 if (storeFiles != null && storeFiles.size() > 0) { 679 final CacheConfig cacheConf = new CacheConfig(conf, hcd); 680 for (StoreFileInfo storeFileInfo: storeFiles) { 681 StoreFileSplitter sfs = 682 new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(), 683 storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true)); 684 futures.add(threadPool.submit(sfs)); 685 } 686 } 687 } 688 // Shutdown the pool 689 threadPool.shutdown(); 690 691 // Wait for all the tasks to finish. 692 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 693 // hbase.regionserver.fileSplitTimeout. If set, use its value. 694 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 695 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 696 try { 697 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 698 if (stillRunning) { 699 threadPool.shutdownNow(); 700 // wait for the thread to shutdown completely. 701 while (!threadPool.isTerminated()) { 702 Thread.sleep(50); 703 } 704 throw new IOException( 705 "Took too long to split the" + " files and create the references, aborting split"); 706 } 707 } catch (InterruptedException e) { 708 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 709 } 710 711 int daughterA = 0; 712 int daughterB = 0; 713 // Look for any exception 714 for (Future<Pair<Path, Path>> future : futures) { 715 try { 716 Pair<Path, Path> p = future.get(); 717 daughterA += p.getFirst() != null ? 1 : 0; 718 daughterB += p.getSecond() != null ? 1 : 0; 719 } catch (InterruptedException e) { 720 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 721 } catch (ExecutionException e) { 722 throw new IOException(e); 723 } 724 } 725 726 if (LOG.isDebugEnabled()) { 727 LOG.debug("pid=" + getProcId() + " split storefiles for region " + 728 getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA + 729 " storefiles, Daughter B: " + daughterB + " storefiles."); 730 } 731 return new Pair<Integer, Integer>(daughterA, daughterB); 732 } 733 734 private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount, 735 final Path dir) throws IOException { 736 if (expectedReferenceFileCount != 0 && 737 expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) { 738 throw new IOException("Failing split. Expected reference file count isn't equal."); 739 } 740 } 741 742 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) 743 throws IOException { 744 if (LOG.isDebugEnabled()) { 745 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + 746 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 747 } 748 749 final byte[] splitRow = getSplitRow(); 750 final String familyName = Bytes.toString(family); 751 final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow, 752 false, splitPolicy); 753 final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow, 754 true, splitPolicy); 755 if (LOG.isDebugEnabled()) { 756 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + 757 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 758 } 759 return new Pair<Path,Path>(path_first, path_second); 760 } 761 762 /** 763 * Utility class used to do the file splitting / reference writing 764 * in parallel instead of sequentially. 765 */ 766 private class StoreFileSplitter implements Callable<Pair<Path,Path>> { 767 private final HRegionFileSystem regionFs; 768 private final byte[] family; 769 private final HStoreFile sf; 770 771 /** 772 * Constructor that takes what it needs to split 773 * @param regionFs the file system 774 * @param family Family that contains the store file 775 * @param sf which file 776 */ 777 public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { 778 this.regionFs = regionFs; 779 this.sf = sf; 780 this.family = family; 781 } 782 783 @Override 784 public Pair<Path,Path> call() throws IOException { 785 return splitStoreFile(regionFs, family, sf); 786 } 787 } 788 789 /** 790 * Post split region actions before the Point-of-No-Return step 791 * @param env MasterProcedureEnv 792 **/ 793 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 794 throws IOException, InterruptedException { 795 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 796 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 797 if (cpHost != null) { 798 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 799 try { 800 for (Mutation p : metaEntries) { 801 RegionInfo.parseRegionName(p.getRow()); 802 } 803 } catch (IOException e) { 804 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 805 + "region name." 806 + "Mutations from coprocessor should only for hbase:meta table."); 807 throw e; 808 } 809 } 810 } 811 812 /** 813 * Add daughter regions to META 814 * @param env MasterProcedureEnv 815 */ 816 private void updateMeta(final MasterProcedureEnv env) throws IOException { 817 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 818 daughter_1_RI, daughter_2_RI); 819 } 820 821 /** 822 * Pre split region actions after the Point-of-No-Return step 823 * @param env MasterProcedureEnv 824 **/ 825 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 826 throws IOException, InterruptedException { 827 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 828 if (cpHost != null) { 829 cpHost.preSplitAfterMETAAction(getUser()); 830 } 831 } 832 833 /** 834 * Post split region actions 835 * @param env MasterProcedureEnv 836 **/ 837 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 838 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 839 if (cpHost != null) { 840 cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser()); 841 } 842 } 843 844 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 845 return env.getMasterServices().getAssignmentManager() 846 .getRegionStates().getRegionServerOfRegion(getParentRegion()); 847 } 848 849 private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env, 850 final int regionReplication) { 851 final UnassignProcedure[] procs = new UnassignProcedure[regionReplication]; 852 for (int i = 0; i < procs.length; ++i) { 853 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); 854 procs[i] = env.getAssignmentManager(). 855 createUnassignProcedure(hri, null, true, !RegionReplicaUtil.isDefaultReplica(hri)); 856 } 857 return procs; 858 } 859 860 private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env, 861 final int regionReplication) { 862 final ServerName targetServer = getParentRegionServerName(env); 863 List<RegionInfo> daughterRegions = new ArrayList<RegionInfo>(2); 864 daughterRegions.add(daughter_1_RI); 865 daughterRegions.add(daughter_2_RI); 866 return createAssignProcedures(regionReplication, env, daughterRegions, targetServer); 867 } 868 869 private AssignProcedure[] createAssignProcedures(final int regionReplication, 870 final MasterProcedureEnv env, final List<RegionInfo> hris, final ServerName serverName) { 871 final AssignProcedure[] procs = new AssignProcedure[hris.size() * regionReplication]; 872 int procsIdx = 0; 873 for (int i = 0; i < hris.size(); ++i) { 874 // create procs for the primary region with the target server. 875 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), 0); 876 procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, serverName); 877 } 878 if (regionReplication > 1) { 879 List<RegionInfo> regionReplicas = 880 new ArrayList<RegionInfo>(hris.size() * (regionReplication - 1)); 881 for (int i = 0; i < hris.size(); ++i) { 882 // We don't include primary replica here 883 for (int j = 1; j < regionReplication; ++j) { 884 regionReplicas.add(RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j)); 885 } 886 } 887 // for the replica regions exclude the primary region's server and call LB's roundRobin 888 // assignment 889 AssignProcedure[] replicaAssignProcs = env.getAssignmentManager() 890 .createRoundRobinAssignProcedures(regionReplicas, Collections.singletonList(serverName)); 891 for (AssignProcedure proc : replicaAssignProcs) { 892 procs[procsIdx++] = proc; 893 } 894 } 895 return procs; 896 } 897 898 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 899 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 900 return htd.getRegionReplication(); 901 } 902 903 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 904 MasterFileSystem fs = env.getMasterFileSystem(); 905 long maxSequenceId = WALSplitter.getMaxRegionSequenceId(env.getMasterConfiguration(), 906 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 907 if (maxSequenceId > 0) { 908 WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(), 909 getWALRegionDir(env, daughter_1_RI), maxSequenceId); 910 WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(), 911 getWALRegionDir(env, daughter_2_RI), maxSequenceId); 912 } 913 } 914 915 @Override 916 protected boolean abort(MasterProcedureEnv env) { 917 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 918 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 919 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 920 return isRollbackSupported(getCurrentState())? super.abort(env): false; 921 } 922}