001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Stream; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.TableDescriptor; 050import org.apache.hadoop.hbase.io.hfile.CacheConfig; 051import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 052import org.apache.hadoop.hbase.master.MasterFileSystem; 053import org.apache.hadoop.hbase.master.RegionState.State; 054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 060import org.apache.hadoop.hbase.quotas.MasterQuotaManager; 061import org.apache.hadoop.hbase.quotas.QuotaExceededException; 062import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 063import org.apache.hadoop.hbase.regionserver.HStore; 064import org.apache.hadoop.hbase.regionserver.HStoreFile; 065import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 066import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction; 067import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 068import org.apache.hadoop.hbase.regionserver.StoreUtils; 069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 070import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 071import org.apache.hadoop.hbase.util.Bytes; 072import org.apache.hadoop.hbase.util.CommonFSUtils; 073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 074import org.apache.hadoop.hbase.util.Pair; 075import org.apache.hadoop.hbase.util.Threads; 076import org.apache.hadoop.hbase.wal.WALSplitUtil; 077import org.apache.hadoop.util.ReflectionUtils; 078import org.apache.yetus.audience.InterfaceAudience; 079import org.slf4j.Logger; 080import org.slf4j.LoggerFactory; 081 082import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; 083 084import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 085import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 086import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 088 089/** 090 * The procedure to split a region in a table. Takes lock on the parent region. It holds the lock 091 * for the life of the procedure. 092 * <p> 093 * Throws exception on construction if determines context hostile to spllt (cluster going down or 094 * master is shutting down or table is disabled). 095 * </p> 096 */ 097@InterfaceAudience.Private 098public class SplitTableRegionProcedure 099 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 100 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 101 private RegionInfo daughterOneRI; 102 private RegionInfo daughterTwoRI; 103 private byte[] bestSplitRow; 104 private RegionSplitPolicy splitPolicy; 105 // exposed for unit testing 106 boolean checkTableModifyInProgress = true; 107 108 public SplitTableRegionProcedure() { 109 // Required by the Procedure framework to create the procedure on replay 110 } 111 112 public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo regionToSplit, 113 final byte[] splitRow) throws IOException { 114 super(env, regionToSplit); 115 preflightChecks(env, true); 116 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 117 // we fail-fast on construction. There it skips the split with just a warning. 118 checkOnline(env, regionToSplit); 119 this.bestSplitRow = splitRow; 120 TableDescriptor tableDescriptor = 121 env.getMasterServices().getTableDescriptors().get(getTableName()); 122 Configuration conf = env.getMasterConfiguration(); 123 if (hasBestSplitRow()) { 124 // Apply the split restriction for the table to the user-specified split point 125 RegionSplitRestriction splitRestriction = 126 RegionSplitRestriction.create(tableDescriptor, conf); 127 byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow); 128 if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) { 129 LOG.warn( 130 "The specified split point {} violates the split restriction of the table. " 131 + "Using {} as a split point.", 132 Bytes.toStringBinary(bestSplitRow), Bytes.toStringBinary(restrictedSplitRow)); 133 bestSplitRow = restrictedSplitRow; 134 } 135 } 136 checkSplittable(env, regionToSplit); 137 final TableName table = regionToSplit.getTable(); 138 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 139 this.daughterOneRI = 140 RegionInfoBuilder.newBuilder(table).setStartKey(regionToSplit.getStartKey()) 141 .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build(); 142 this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow) 143 .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build(); 144 145 if (tableDescriptor.getRegionSplitPolicyClassName() != null) { 146 // Since we don't have region reference here, creating the split policy instance without it. 147 // This can be used to invoke methods which don't require Region reference. This instantiation 148 // of a class on Master-side though it only makes sense on the RegionServer-side is 149 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 150 Class<? extends RegionSplitPolicy> clazz = 151 RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf); 152 this.splitPolicy = ReflectionUtils.newInstance(clazz, conf); 153 } 154 } 155 156 @Override 157 protected LockState acquireLock(final MasterProcedureEnv env) { 158 if ( 159 env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(), 160 daughterOneRI, daughterTwoRI) 161 ) { 162 try { 163 LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); 164 } catch (IOException e) { 165 // Ignore, just for logging 166 } 167 return LockState.LOCK_EVENT_WAIT; 168 } 169 return LockState.LOCK_ACQUIRED; 170 } 171 172 @Override 173 protected void releaseLock(final MasterProcedureEnv env) { 174 env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI, 175 daughterTwoRI); 176 } 177 178 public RegionInfo getDaughterOneRI() { 179 return daughterOneRI; 180 } 181 182 public RegionInfo getDaughterTwoRI() { 183 return daughterTwoRI; 184 } 185 186 private boolean hasBestSplitRow() { 187 return bestSplitRow != null && bestSplitRow.length > 0; 188 } 189 190 /** 191 * Check whether the region is splittable 192 * @param env MasterProcedureEnv 193 * @param regionToSplit parent Region to be split 194 */ 195 private void checkSplittable(final MasterProcedureEnv env, final RegionInfo regionToSplit) 196 throws IOException { 197 // Ask the remote RS if this region is splittable. 198 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at 199 // this time. 200 if (regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 201 throw new IllegalArgumentException("Can't invoke split on non-default regions directly"); 202 } 203 RegionStateNode node = 204 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 205 IOException splittableCheckIOE = null; 206 boolean splittable = false; 207 if (node != null) { 208 try { 209 GetRegionInfoResponse response; 210 if (!hasBestSplitRow()) { 211 LOG.info( 212 "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}", 213 node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation()); 214 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 215 node.getRegionInfo(), true); 216 bestSplitRow = 217 response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 218 } else { 219 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 220 node.getRegionInfo(), false); 221 } 222 splittable = response.hasSplittable() && response.getSplittable(); 223 if (LOG.isDebugEnabled()) { 224 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 225 } 226 } catch (IOException e) { 227 splittableCheckIOE = e; 228 } 229 } 230 231 if (!splittable) { 232 IOException e = 233 new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 234 if (splittableCheckIOE != null) { 235 e.initCause(splittableCheckIOE); 236 } 237 throw e; 238 } 239 240 if (!hasBestSplitRow()) { 241 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " 242 + "maybe table is too small for auto split. For force split, try specifying split row"); 243 } 244 245 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 246 throw new DoNotRetryIOException( 247 "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow)); 248 } 249 250 if (!regionToSplit.containsRow(bestSplitRow)) { 251 throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" 252 + Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit); 253 } 254 } 255 256 /** 257 * Calculate daughter regionid to use. 258 * @param hri Parent {@link RegionInfo} 259 * @return Daughter region id (timestamp) to use. 260 */ 261 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 262 long rid = EnvironmentEdgeManager.currentTime(); 263 // Regionid is timestamp. Can't be less than that of parent else will insert 264 // at wrong location in hbase:meta (See HBASE-710). 265 if (rid < hri.getRegionId()) { 266 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() 267 + " but current time here is " + rid); 268 rid = hri.getRegionId() + 1; 269 } 270 return rid; 271 } 272 273 private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException { 274 AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()), 275 getRegionReplication(env)); 276 } 277 278 private void checkClosedRegions(MasterProcedureEnv env) throws IOException { 279 // theoretically this should not happen any more after we use TRSP, but anyway let's add a check 280 // here 281 AssignmentManagerUtil.checkClosedRegion(env, getParentRegion()); 282 } 283 284 @Override 285 protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state) 286 throws InterruptedException { 287 LOG.trace("{} execute state={}", this, state); 288 289 try { 290 switch (state) { 291 case SPLIT_TABLE_REGION_PREPARE: 292 if (prepareSplitRegion(env)) { 293 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 294 break; 295 } else { 296 return Flow.NO_MORE_STATE; 297 } 298 case SPLIT_TABLE_REGION_PRE_OPERATION: 299 preSplitRegion(env); 300 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 301 break; 302 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 303 addChildProcedure(createUnassignProcedures(env)); 304 // createUnassignProcedures() can throw out IOException. If this happens, 305 // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions 306 // is closed as all created UnassignProcedures are rolled back. If it rolls back with 307 // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(), 308 // otherwise, it will result in OpenRegionProcedure for an already open region. 309 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 310 break; 311 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 312 checkClosedRegions(env); 313 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 314 break; 315 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 316 removeNonDefaultReplicas(env); 317 createDaughterRegions(env); 318 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 319 break; 320 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 321 writeMaxSequenceIdFile(env); 322 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 323 break; 324 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 325 preSplitRegionBeforeMETA(env); 326 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 327 break; 328 case SPLIT_TABLE_REGION_UPDATE_META: 329 updateMeta(env); 330 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 331 break; 332 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 333 preSplitRegionAfterMETA(env); 334 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 335 break; 336 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 337 addChildProcedure(createAssignProcedures(env)); 338 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 339 break; 340 case SPLIT_TABLE_REGION_POST_OPERATION: 341 postSplitRegion(env); 342 return Flow.NO_MORE_STATE; 343 default: 344 throw new UnsupportedOperationException(this + " unhandled state=" + state); 345 } 346 } catch (IOException e) { 347 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 348 if (!isRollbackSupported(state)) { 349 // We reach a state that cannot be rolled back. We just need to keep retrying. 350 LOG.warn(msg, e); 351 } else { 352 LOG.error(msg, e); 353 setFailure("master-split-regions", e); 354 } 355 } 356 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 357 return Flow.HAS_MORE_STATE; 358 } 359 360 /** 361 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously submitted 362 * for parent region to be split (rollback doesn't wait on the completion of the AssignProcedure) 363 * . This can be improved by changing rollback() to support sub-procedures. See HBASE-19851 for 364 * details. 365 */ 366 @Override 367 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 368 throws IOException, InterruptedException { 369 LOG.trace("{} rollback state={}", this, state); 370 371 try { 372 switch (state) { 373 case SPLIT_TABLE_REGION_POST_OPERATION: 374 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 375 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 376 case SPLIT_TABLE_REGION_UPDATE_META: 377 // PONR 378 throw new UnsupportedOperationException(this + " unhandled state=" + state); 379 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 380 break; 381 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 382 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 383 deleteDaughterRegions(env); 384 break; 385 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 386 openParentRegion(env); 387 break; 388 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 389 // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call 390 // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an 391 // already open region. 392 break; 393 case SPLIT_TABLE_REGION_PRE_OPERATION: 394 postRollBackSplitRegion(env); 395 break; 396 case SPLIT_TABLE_REGION_PREPARE: 397 rollbackPrepareSplit(env); 398 break; 399 default: 400 throw new UnsupportedOperationException(this + " unhandled state=" + state); 401 } 402 } catch (IOException e) { 403 // This will be retried. Unless there is a bug in the code, 404 // this should be just a "temporary error" (e.g. network down) 405 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state 406 + " for splitting the region " + getParentRegion().getEncodedName() + " in table " 407 + getTableName(), e); 408 throw e; 409 } 410 } 411 412 /* 413 * Check whether we are in the state that can be rollback 414 */ 415 @Override 416 protected boolean isRollbackSupported(final SplitTableRegionState state) { 417 switch (state) { 418 case SPLIT_TABLE_REGION_POST_OPERATION: 419 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 420 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 421 case SPLIT_TABLE_REGION_UPDATE_META: 422 // It is not safe to rollback if we reach to these states. 423 return false; 424 default: 425 break; 426 } 427 return true; 428 } 429 430 @Override 431 protected SplitTableRegionState getState(final int stateId) { 432 return SplitTableRegionState.forNumber(stateId); 433 } 434 435 @Override 436 protected int getStateId(final SplitTableRegionState state) { 437 return state.getNumber(); 438 } 439 440 @Override 441 protected SplitTableRegionState getInitialState() { 442 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 443 } 444 445 @Override 446 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 447 super.serializeStateData(serializer); 448 449 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 450 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 451 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 452 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 453 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI)) 454 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI)); 455 serializer.serialize(splitTableRegionMsg.build()); 456 } 457 458 @Override 459 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 460 super.deserializeStateData(serializer); 461 462 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 463 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 464 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 465 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 466 assert (splitTableRegionsMsg.getChildRegionInfoCount() == 2); 467 daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 468 daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 469 } 470 471 @Override 472 public void toStringClassDetails(StringBuilder sb) { 473 sb.append(getClass().getSimpleName()); 474 sb.append(" table="); 475 sb.append(getTableName()); 476 sb.append(", parent="); 477 sb.append(getParentRegion().getShortNameToLog()); 478 sb.append(", daughterA="); 479 sb.append(daughterOneRI.getShortNameToLog()); 480 sb.append(", daughterB="); 481 sb.append(daughterTwoRI.getShortNameToLog()); 482 } 483 484 private RegionInfo getParentRegion() { 485 return getRegion(); 486 } 487 488 @Override 489 public TableOperationType getTableOperationType() { 490 return TableOperationType.REGION_SPLIT; 491 } 492 493 @Override 494 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 495 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 496 } 497 498 private byte[] getSplitRow() { 499 return daughterTwoRI.getStartKey(); 500 } 501 502 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 503 504 /** 505 * Prepare to Split region. 506 * @param env MasterProcedureEnv 507 */ 508 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 509 // Fail if we are taking snapshot for the given table 510 if ( 511 env.getMasterServices().getSnapshotManager() 512 .isTableTakingAnySnapshot(getParentRegion().getTable()) 513 ) { 514 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() 515 + ", because we are taking snapshot for the table " + getParentRegion().getTable())); 516 return false; 517 } 518 519 /* 520 * Sometimes a ModifyTableProcedure has edited a table descriptor to change the number of region 521 * replicas for a table, but it has not yet opened/closed the new replicas. The 522 * ModifyTableProcedure assumes that nobody else will do the opening/closing of the new 523 * replicas, but a concurrent SplitTableRegionProcedure would violate that assumption. 524 */ 525 if (checkTableModifyInProgress && isTableModificationInProgress(env)) { 526 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() 527 + ", because there is an active procedure that is modifying the table " 528 + getParentRegion().getTable())); 529 return false; 530 } 531 532 // Check whether the region is splittable 533 RegionStateNode node = 534 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 535 536 if (node == null) { 537 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 538 } 539 540 RegionInfo parentHRI = node.getRegionInfo(); 541 if (parentHRI == null) { 542 LOG.info("Unsplittable; parent region is null; node={}", node); 543 return false; 544 } 545 // Lookup the parent HRI state from the AM, which has the latest updated info. 546 // Protect against the case where concurrent SPLIT requests came in and succeeded 547 // just before us. 548 if (node.isInState(State.SPLIT)) { 549 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 550 return false; 551 } 552 if (parentHRI.isSplit() || parentHRI.isOffline()) { 553 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 554 return false; 555 } 556 557 // expected parent to be online or closed 558 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 559 // We may have SPLIT already? 560 setFailure( 561 new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state=" 562 + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES))); 563 return false; 564 } 565 566 // Mostly the below two checks are not used because we already check the switches before 567 // submitting the split procedure. Just for safety, we are checking the switch again here. 568 // Also, in case the switch was set to false after submission, this procedure can be rollbacked, 569 // thanks to this double check! 570 // case 1: check for cluster level switch 571 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 572 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 573 setFailure(new IOException( 574 "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off")); 575 return false; 576 } 577 // case 2: check for table level switch 578 if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { 579 LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), 580 parentHRI); 581 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() 582 + " failed as region split is disabled for the table")); 583 return false; 584 } 585 586 // set node state as SPLITTING 587 node.setState(State.SPLITTING); 588 589 // Since we have the lock and the master is coordinating the operation 590 // we are always able to split the region 591 return true; 592 } 593 594 /** 595 * Rollback prepare split region 596 * @param env MasterProcedureEnv 597 */ 598 private void rollbackPrepareSplit(final MasterProcedureEnv env) { 599 RegionStateNode parentRegionStateNode = 600 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 601 if (parentRegionStateNode.getState() == State.SPLITTING) { 602 parentRegionStateNode.setState(State.OPEN); 603 } 604 } 605 606 /** 607 * Action before splitting region in a table. 608 * @param env MasterProcedureEnv 609 */ 610 private void preSplitRegion(final MasterProcedureEnv env) 611 throws IOException, InterruptedException { 612 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 613 if (cpHost != null) { 614 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 615 } 616 617 // TODO: Clean up split and merge. Currently all over the place. 618 // Notify QuotaManager and RegionNormalizer 619 try { 620 MasterQuotaManager masterQuotaManager = env.getMasterServices().getMasterQuotaManager(); 621 if (masterQuotaManager != null) { 622 masterQuotaManager.onRegionSplit(this.getParentRegion()); 623 } 624 } catch (QuotaExceededException e) { 625 // TODO: why is this here? split requests can be submitted by actors other than the normalizer 626 env.getMasterServices().getRegionNormalizerManager() 627 .planSkipped(NormalizationPlan.PlanType.SPLIT); 628 throw e; 629 } 630 } 631 632 /** 633 * Action after rollback a split table region action. 634 * @param env MasterProcedureEnv 635 */ 636 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 637 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 638 if (cpHost != null) { 639 cpHost.postRollBackSplitRegionAction(getUser()); 640 } 641 } 642 643 /** 644 * Rollback close parent region 645 */ 646 private void openParentRegion(MasterProcedureEnv env) throws IOException { 647 AssignmentManagerUtil.reopenRegionsForRollback(env, 648 Collections.singletonList((getParentRegion())), getRegionReplication(env), 649 getParentRegionServerName(env)); 650 } 651 652 /** 653 * Create daughter regions 654 */ 655 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 656 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 657 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 658 final FileSystem fs = mfs.getFileSystem(); 659 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 660 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 661 regionFs.createSplitsDir(daughterOneRI, daughterTwoRI); 662 Pair<List<StoreFileInfo>, List<StoreFileInfo>> expectedReferences = 663 splitStoreFiles(env, regionFs); 664 final ExecutorService threadPool = Executors.newFixedThreadPool(2, 665 new ThreadFactoryBuilder().setNameFormat("RegionCommitter-pool-%d").setDaemon(true) 666 .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build()); 667 Future<Path> futureOne = threadPool.submit(new Callable<Path>() { 668 @Override 669 public Path call() throws IOException { 670 return regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env); 671 } 672 }); 673 Future<Path> futureTwo = threadPool.submit(new Callable<Path>() { 674 @Override 675 public Path call() throws IOException { 676 return regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env); 677 } 678 }); 679 handleThreadPoolShutdown(threadPool, env.getMasterConfiguration()); 680 681 try { 682 futureOne.get(); 683 futureTwo.get(); 684 } catch (InterruptedException e) { 685 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 686 } catch (ExecutionException e) { 687 throw new IOException("Daughter region commit failed", e); 688 } 689 } 690 691 private void handleThreadPoolShutdown(ExecutorService threadPool, Configuration conf) 692 throws IOException { 693 threadPool.shutdown(); 694 // Wait for all the tasks to finish. 695 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 696 // fileSplitTimeout. If set, use its value. 697 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 698 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 699 try { 700 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 701 if (stillRunning) { 702 threadPool.shutdownNow(); 703 // wait for the thread to shutdown completely. 704 while (!threadPool.isTerminated()) { 705 Thread.sleep(50); 706 } 707 throw new IOException( 708 "Took too long to split the files and create the references, aborting split"); 709 } 710 } catch (InterruptedException e) { 711 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 712 } 713 } 714 715 private void deleteDaughterRegions(final MasterProcedureEnv env) throws IOException { 716 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 717 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 718 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 719 tabledir, daughterOneRI); 720 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 721 tabledir, daughterTwoRI); 722 } 723 724 /** 725 * Create Split directory 726 * @param env MasterProcedureEnv 727 */ 728 private Pair<List<StoreFileInfo>, List<StoreFileInfo>> splitStoreFiles( 729 final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException { 730 final Configuration conf = env.getMasterConfiguration(); 731 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 732 // The following code sets up a thread pool executor with as many slots as 733 // there's files to split. It then fires up everything, waits for 734 // completion and finally checks for any exception 735 // 736 // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the 737 // table dir. In case of failure, the proc would go through this again, already existing 738 // region dirs and split files would just be ignored, new split files should get created. 739 int nbFiles = 0; 740 final Map<String, Collection<StoreFileInfo>> files = 741 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 742 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 743 String family = cfd.getNameAsString(); 744 StoreFileTracker tracker = 745 StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs); 746 Collection<StoreFileInfo> sfis = tracker.load(); 747 if (sfis == null) { 748 continue; 749 } 750 Collection<StoreFileInfo> filteredSfis = null; 751 for (StoreFileInfo sfi : sfis) { 752 // Filter. There is a lag cleaning up compacted reference files. They get cleared 753 // after a delay in case outstanding Scanners still have references. Because of this, 754 // the listing of the Store content may have straggler reference files. Skip these. 755 // It should be safe to skip references at this point because we checked above with 756 // the region if it thinks it is splittable and if we are here, it thinks it is 757 // splitable. 758 if (sfi.isReference()) { 759 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 760 continue; 761 } 762 if (filteredSfis == null) { 763 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 764 files.put(family, filteredSfis); 765 } 766 filteredSfis.add(sfi); 767 nbFiles++; 768 } 769 } 770 if (nbFiles == 0) { 771 // no file needs to be splitted. 772 return new Pair<>(Collections.emptyList(), Collections.emptyList()); 773 } 774 // Max #threads is the smaller of the number of storefiles or the default max determined above. 775 int maxThreads = Math.min( 776 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 777 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 778 nbFiles); 779 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" 780 + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 781 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 782 new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true) 783 .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build()); 784 final List<Future<Pair<StoreFileInfo, StoreFileInfo>>> futures = 785 new ArrayList<Future<Pair<StoreFileInfo, StoreFileInfo>>>(nbFiles); 786 787 // Split each store file. 788 for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) { 789 byte[] familyName = Bytes.toBytes(e.getKey()); 790 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 791 Collection<StoreFileInfo> storeFileInfos = e.getValue(); 792 final Collection<StoreFileInfo> storeFiles = storeFileInfos; 793 if (storeFiles != null && storeFiles.size() > 0) { 794 final Configuration storeConfiguration = 795 StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd); 796 for (StoreFileInfo storeFileInfo : storeFiles) { 797 // As this procedure is running on master, use CacheConfig.DISABLED means 798 // don't cache any block. 799 // We also need to pass through a suitable CompoundConfiguration as if this 800 // is running in a regionserver's Store context, or we might not be able 801 // to read the hfiles. 802 storeFileInfo.setConf(storeConfiguration); 803 StoreFileSplitter sfs = new StoreFileSplitter(regionFs, htd, hcd, 804 new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); 805 futures.add(threadPool.submit(sfs)); 806 } 807 } 808 } 809 handleThreadPoolShutdown(threadPool, conf); 810 List<StoreFileInfo> daughterA = new ArrayList<>(); 811 List<StoreFileInfo> daughterB = new ArrayList<>(); 812 // Look for any exception 813 for (Future<Pair<StoreFileInfo, StoreFileInfo>> future : futures) { 814 try { 815 Pair<StoreFileInfo, StoreFileInfo> p = future.get(); 816 if (p.getFirst() != null) { 817 daughterA.add(p.getFirst()); 818 } 819 if (p.getSecond() != null) { 820 daughterB.add(p.getSecond()); 821 } 822 } catch (InterruptedException e) { 823 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 824 } catch (ExecutionException e) { 825 throw new IOException(e); 826 } 827 } 828 829 if (LOG.isDebugEnabled()) { 830 LOG.debug("pid=" + getProcId() + " split storefiles for region " 831 + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA 832 + " storefiles, Daughter B: " + daughterB + " storefiles."); 833 } 834 return new Pair<>(daughterA, daughterB); 835 } 836 837 private Pair<StoreFileInfo, StoreFileInfo> splitStoreFile(HRegionFileSystem regionFs, 838 TableDescriptor htd, ColumnFamilyDescriptor hcd, HStoreFile sf) throws IOException { 839 if (LOG.isDebugEnabled()) { 840 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath() 841 + " for region: " + getParentRegion().getShortNameToLog()); 842 } 843 844 final byte[] splitRow = getSplitRow(); 845 final String familyName = hcd.getNameAsString(); 846 StoreFileTracker daughterOneSft = 847 StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd, 848 HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(), 849 regionFs.getTableDir(), daughterOneRI)); 850 StoreFileTracker daughterTwoSft = 851 StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd, 852 HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(), 853 regionFs.getTableDir(), daughterTwoRI)); 854 final StoreFileInfo sfiFirst = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, 855 splitRow, false, splitPolicy, daughterOneSft); 856 final StoreFileInfo sfiSecond = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, 857 splitRow, true, splitPolicy, daughterTwoSft); 858 if (LOG.isDebugEnabled()) { 859 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath() 860 + " for region: " + getParentRegion().getShortNameToLog()); 861 } 862 return new Pair<StoreFileInfo, StoreFileInfo>(sfiFirst, sfiSecond); 863 } 864 865 /** 866 * Utility class used to do the file splitting / reference writing in parallel instead of 867 * sequentially. 868 */ 869 private class StoreFileSplitter implements Callable<Pair<StoreFileInfo, StoreFileInfo>> { 870 private final HRegionFileSystem regionFs; 871 private final ColumnFamilyDescriptor hcd; 872 private final HStoreFile sf; 873 private final TableDescriptor htd; 874 875 /** 876 * Constructor that takes what it needs to split 877 * @param regionFs the file system 878 * @param hcd Family that contains the store file 879 * @param sf which file 880 */ 881 public StoreFileSplitter(HRegionFileSystem regionFs, TableDescriptor htd, 882 ColumnFamilyDescriptor hcd, HStoreFile sf) { 883 this.regionFs = regionFs; 884 this.sf = sf; 885 this.hcd = hcd; 886 this.htd = htd; 887 } 888 889 @Override 890 public Pair<StoreFileInfo, StoreFileInfo> call() throws IOException { 891 return splitStoreFile(regionFs, htd, hcd, sf); 892 } 893 } 894 895 /** 896 * Post split region actions before the Point-of-No-Return step 897 * @param env MasterProcedureEnv 898 **/ 899 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 900 throws IOException, InterruptedException { 901 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 902 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 903 if (cpHost != null) { 904 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 905 try { 906 for (Mutation p : metaEntries) { 907 RegionInfo.parseRegionName(p.getRow()); 908 } 909 } catch (IOException e) { 910 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 911 + "region name." + "Mutations from coprocessor should only for hbase:meta table."); 912 throw e; 913 } 914 } 915 } 916 917 /** 918 * Add daughter regions to META 919 * @param env MasterProcedureEnv 920 */ 921 private void updateMeta(final MasterProcedureEnv env) throws IOException { 922 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 923 daughterOneRI, daughterTwoRI); 924 } 925 926 /** 927 * Pre split region actions after the Point-of-No-Return step 928 * @param env MasterProcedureEnv 929 **/ 930 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 931 throws IOException, InterruptedException { 932 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 933 if (cpHost != null) { 934 cpHost.preSplitAfterMETAAction(getUser()); 935 } 936 } 937 938 /** 939 * Post split region actions 940 * @param env MasterProcedureEnv 941 **/ 942 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 943 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 944 if (cpHost != null) { 945 cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser()); 946 } 947 } 948 949 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 950 return env.getMasterServices().getAssignmentManager().getRegionStates() 951 .getRegionServerOfRegion(getParentRegion()); 952 } 953 954 private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env) 955 throws IOException { 956 return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env, 957 Stream.of(getParentRegion()), getRegionReplication(env)); 958 } 959 960 private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) 961 throws IOException { 962 List<RegionInfo> hris = new ArrayList<RegionInfo>(2); 963 hris.add(daughterOneRI); 964 hris.add(daughterTwoRI); 965 return AssignmentManagerUtil.createAssignProceduresForSplitDaughters(env, hris, 966 getRegionReplication(env), getParentRegionServerName(env)); 967 } 968 969 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 970 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 971 return htd.getRegionReplication(); 972 } 973 974 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 975 MasterFileSystem fs = env.getMasterFileSystem(); 976 long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(), 977 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 978 if (maxSequenceId > 0) { 979 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 980 getWALRegionDir(env, daughterOneRI), maxSequenceId); 981 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 982 getWALRegionDir(env, daughterTwoRI), maxSequenceId); 983 } 984 } 985 986 @Override 987 protected boolean abort(MasterProcedureEnv env) { 988 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 989 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 990 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 991 return isRollbackSupported(getCurrentState()) ? super.abort(env) : false; 992 } 993}