001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Stream; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.TableDescriptor; 050import org.apache.hadoop.hbase.io.hfile.CacheConfig; 051import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 052import org.apache.hadoop.hbase.master.MasterFileSystem; 053import org.apache.hadoop.hbase.master.RegionState.State; 054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 060import org.apache.hadoop.hbase.quotas.MasterQuotaManager; 061import org.apache.hadoop.hbase.quotas.QuotaExceededException; 062import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 063import org.apache.hadoop.hbase.regionserver.HStore; 064import org.apache.hadoop.hbase.regionserver.HStoreFile; 065import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 066import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction; 067import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 068import org.apache.hadoop.hbase.regionserver.StoreUtils; 069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 070import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 071import org.apache.hadoop.hbase.util.Bytes; 072import org.apache.hadoop.hbase.util.CommonFSUtils; 073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 074import org.apache.hadoop.hbase.util.FSUtils; 075import org.apache.hadoop.hbase.util.Pair; 076import org.apache.hadoop.hbase.util.Threads; 077import org.apache.hadoop.hbase.wal.WALSplitUtil; 078import org.apache.hadoop.util.ReflectionUtils; 079import org.apache.yetus.audience.InterfaceAudience; 080import org.slf4j.Logger; 081import org.slf4j.LoggerFactory; 082 083import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; 084 085import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 086import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 088import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 089 090/** 091 * The procedure to split a region in a table. Takes lock on the parent region. It holds the lock 092 * for the life of the procedure. 093 * <p> 094 * Throws exception on construction if determines context hostile to spllt (cluster going down or 095 * master is shutting down or table is disabled). 096 * </p> 097 */ 098@InterfaceAudience.Private 099public class SplitTableRegionProcedure 100 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 101 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 102 private RegionInfo daughterOneRI; 103 private RegionInfo daughterTwoRI; 104 private byte[] bestSplitRow; 105 private RegionSplitPolicy splitPolicy; 106 // exposed for unit testing 107 boolean checkTableModifyInProgress = true; 108 109 public SplitTableRegionProcedure() { 110 // Required by the Procedure framework to create the procedure on replay 111 } 112 113 public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo regionToSplit, 114 final byte[] splitRow) throws IOException { 115 super(env, regionToSplit); 116 preflightChecks(env, true); 117 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 118 // we fail-fast on construction. There it skips the split with just a warning. 119 checkOnline(env, regionToSplit); 120 this.bestSplitRow = splitRow; 121 TableDescriptor tableDescriptor = 122 env.getMasterServices().getTableDescriptors().get(getTableName()); 123 Configuration conf = env.getMasterConfiguration(); 124 if (hasBestSplitRow()) { 125 // Apply the split restriction for the table to the user-specified split point 126 RegionSplitRestriction splitRestriction = 127 RegionSplitRestriction.create(tableDescriptor, conf); 128 byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow); 129 if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) { 130 LOG.warn( 131 "The specified split point {} violates the split restriction of the table. " 132 + "Using {} as a split point.", 133 Bytes.toStringBinary(bestSplitRow), Bytes.toStringBinary(restrictedSplitRow)); 134 bestSplitRow = restrictedSplitRow; 135 } 136 } 137 checkSplittable(env, regionToSplit); 138 final TableName table = regionToSplit.getTable(); 139 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 140 this.daughterOneRI = 141 RegionInfoBuilder.newBuilder(table).setStartKey(regionToSplit.getStartKey()) 142 .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build(); 143 this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow) 144 .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build(); 145 146 if (tableDescriptor.getRegionSplitPolicyClassName() != null) { 147 // Since we don't have region reference here, creating the split policy instance without it. 148 // This can be used to invoke methods which don't require Region reference. This instantiation 149 // of a class on Master-side though it only makes sense on the RegionServer-side is 150 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 151 Class<? extends RegionSplitPolicy> clazz = 152 RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf); 153 this.splitPolicy = ReflectionUtils.newInstance(clazz, conf); 154 } 155 } 156 157 @Override 158 protected LockState acquireLock(final MasterProcedureEnv env) { 159 if ( 160 env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(), 161 daughterOneRI, daughterTwoRI) 162 ) { 163 try { 164 LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); 165 } catch (IOException e) { 166 // Ignore, just for logging 167 } 168 return LockState.LOCK_EVENT_WAIT; 169 } 170 return LockState.LOCK_ACQUIRED; 171 } 172 173 @Override 174 protected void releaseLock(final MasterProcedureEnv env) { 175 env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI, 176 daughterTwoRI); 177 } 178 179 public RegionInfo getDaughterOneRI() { 180 return daughterOneRI; 181 } 182 183 public RegionInfo getDaughterTwoRI() { 184 return daughterTwoRI; 185 } 186 187 private boolean hasBestSplitRow() { 188 return bestSplitRow != null && bestSplitRow.length > 0; 189 } 190 191 /** 192 * Check whether the region is splittable 193 * @param env MasterProcedureEnv 194 * @param regionToSplit parent Region to be split 195 */ 196 private void checkSplittable(final MasterProcedureEnv env, final RegionInfo regionToSplit) 197 throws IOException { 198 // Ask the remote RS if this region is splittable. 199 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at 200 // this time. 201 if (regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 202 throw new IllegalArgumentException("Can't invoke split on non-default regions directly"); 203 } 204 RegionStateNode node = 205 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 206 IOException splittableCheckIOE = null; 207 boolean splittable = false; 208 if (node != null) { 209 try { 210 GetRegionInfoResponse response; 211 if (!hasBestSplitRow()) { 212 LOG.info( 213 "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}", 214 node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation()); 215 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 216 node.getRegionInfo(), true); 217 bestSplitRow = 218 response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 219 } else { 220 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 221 node.getRegionInfo(), false); 222 } 223 splittable = response.hasSplittable() && response.getSplittable(); 224 if (LOG.isDebugEnabled()) { 225 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 226 } 227 } catch (IOException e) { 228 splittableCheckIOE = e; 229 } 230 } 231 232 if (!splittable) { 233 IOException e = 234 new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 235 if (splittableCheckIOE != null) { 236 e.initCause(splittableCheckIOE); 237 } 238 throw e; 239 } 240 241 if (!hasBestSplitRow()) { 242 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " 243 + "maybe table is too small for auto split. For force split, try specifying split row"); 244 } 245 246 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 247 throw new DoNotRetryIOException( 248 "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow)); 249 } 250 251 if (!regionToSplit.containsRow(bestSplitRow)) { 252 throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" 253 + Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit); 254 } 255 } 256 257 /** 258 * Calculate daughter regionid to use. 259 * @param hri Parent {@link RegionInfo} 260 * @return Daughter region id (timestamp) to use. 261 */ 262 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 263 long rid = EnvironmentEdgeManager.currentTime(); 264 // Regionid is timestamp. Can't be less than that of parent else will insert 265 // at wrong location in hbase:meta (See HBASE-710). 266 if (rid < hri.getRegionId()) { 267 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() 268 + " but current time here is " + rid); 269 rid = hri.getRegionId() + 1; 270 } 271 return rid; 272 } 273 274 private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException { 275 AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()), 276 getRegionReplication(env)); 277 } 278 279 private void checkClosedRegions(MasterProcedureEnv env) throws IOException { 280 // theoretically this should not happen any more after we use TRSP, but anyway let's add a check 281 // here 282 AssignmentManagerUtil.checkClosedRegion(env, getParentRegion()); 283 } 284 285 @Override 286 protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state) 287 throws InterruptedException { 288 LOG.trace("{} execute state={}", this, state); 289 290 try { 291 switch (state) { 292 case SPLIT_TABLE_REGION_PREPARE: 293 if (prepareSplitRegion(env)) { 294 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 295 break; 296 } else { 297 return Flow.NO_MORE_STATE; 298 } 299 case SPLIT_TABLE_REGION_PRE_OPERATION: 300 preSplitRegion(env); 301 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 302 break; 303 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 304 addChildProcedure(createUnassignProcedures(env)); 305 // createUnassignProcedures() can throw out IOException. If this happens, 306 // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions 307 // is closed as all created UnassignProcedures are rolled back. If it rolls back with 308 // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(), 309 // otherwise, it will result in OpenRegionProcedure for an already open region. 310 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 311 break; 312 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 313 checkClosedRegions(env); 314 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 315 break; 316 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 317 removeNonDefaultReplicas(env); 318 createDaughterRegions(env); 319 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 320 break; 321 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 322 writeMaxSequenceIdFile(env); 323 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 324 break; 325 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 326 preSplitRegionBeforeMETA(env); 327 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 328 break; 329 case SPLIT_TABLE_REGION_UPDATE_META: 330 updateMeta(env); 331 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 332 break; 333 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 334 preSplitRegionAfterMETA(env); 335 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 336 break; 337 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 338 addChildProcedure(createAssignProcedures(env)); 339 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 340 break; 341 case SPLIT_TABLE_REGION_POST_OPERATION: 342 postSplitRegion(env); 343 return Flow.NO_MORE_STATE; 344 default: 345 throw new UnsupportedOperationException(this + " unhandled state=" + state); 346 } 347 } catch (IOException e) { 348 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 349 if (!isRollbackSupported(state)) { 350 // We reach a state that cannot be rolled back. We just need to keep retrying. 351 LOG.warn(msg, e); 352 } else { 353 LOG.error(msg, e); 354 setFailure("master-split-regions", e); 355 } 356 } 357 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 358 return Flow.HAS_MORE_STATE; 359 } 360 361 /** 362 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously submitted 363 * for parent region to be split (rollback doesn't wait on the completion of the AssignProcedure) 364 * . This can be improved by changing rollback() to support sub-procedures. See HBASE-19851 for 365 * details. 366 */ 367 @Override 368 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 369 throws IOException, InterruptedException { 370 LOG.trace("{} rollback state={}", this, state); 371 372 try { 373 switch (state) { 374 case SPLIT_TABLE_REGION_POST_OPERATION: 375 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 376 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 377 case SPLIT_TABLE_REGION_UPDATE_META: 378 // PONR 379 throw new UnsupportedOperationException(this + " unhandled state=" + state); 380 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 381 break; 382 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 383 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 384 deleteDaughterRegions(env); 385 break; 386 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 387 openParentRegion(env); 388 break; 389 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 390 // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call 391 // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an 392 // already open region. 393 break; 394 case SPLIT_TABLE_REGION_PRE_OPERATION: 395 postRollBackSplitRegion(env); 396 break; 397 case SPLIT_TABLE_REGION_PREPARE: 398 rollbackPrepareSplit(env); 399 break; 400 default: 401 throw new UnsupportedOperationException(this + " unhandled state=" + state); 402 } 403 } catch (IOException e) { 404 // This will be retried. Unless there is a bug in the code, 405 // this should be just a "temporary error" (e.g. network down) 406 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state 407 + " for splitting the region " + getParentRegion().getEncodedName() + " in table " 408 + getTableName(), e); 409 throw e; 410 } 411 } 412 413 /* 414 * Check whether we are in the state that can be rollback 415 */ 416 @Override 417 protected boolean isRollbackSupported(final SplitTableRegionState state) { 418 switch (state) { 419 case SPLIT_TABLE_REGION_POST_OPERATION: 420 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 421 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 422 case SPLIT_TABLE_REGION_UPDATE_META: 423 // It is not safe to rollback if we reach to these states. 424 return false; 425 default: 426 break; 427 } 428 return true; 429 } 430 431 @Override 432 protected SplitTableRegionState getState(final int stateId) { 433 return SplitTableRegionState.forNumber(stateId); 434 } 435 436 @Override 437 protected int getStateId(final SplitTableRegionState state) { 438 return state.getNumber(); 439 } 440 441 @Override 442 protected SplitTableRegionState getInitialState() { 443 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 444 } 445 446 @Override 447 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 448 super.serializeStateData(serializer); 449 450 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 451 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 452 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 453 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 454 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI)) 455 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI)); 456 serializer.serialize(splitTableRegionMsg.build()); 457 } 458 459 @Override 460 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 461 super.deserializeStateData(serializer); 462 463 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 464 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 465 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 466 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 467 assert (splitTableRegionsMsg.getChildRegionInfoCount() == 2); 468 daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 469 daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 470 } 471 472 @Override 473 public void toStringClassDetails(StringBuilder sb) { 474 sb.append(getClass().getSimpleName()); 475 sb.append(" table="); 476 sb.append(getTableName()); 477 sb.append(", parent="); 478 sb.append(getParentRegion().getShortNameToLog()); 479 sb.append(", daughterA="); 480 sb.append(daughterOneRI.getShortNameToLog()); 481 sb.append(", daughterB="); 482 sb.append(daughterTwoRI.getShortNameToLog()); 483 } 484 485 private RegionInfo getParentRegion() { 486 return getRegion(); 487 } 488 489 @Override 490 public TableOperationType getTableOperationType() { 491 return TableOperationType.REGION_SPLIT; 492 } 493 494 @Override 495 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 496 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 497 } 498 499 private byte[] getSplitRow() { 500 return daughterTwoRI.getStartKey(); 501 } 502 503 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 504 505 /** 506 * Prepare to Split region. 507 * @param env MasterProcedureEnv 508 */ 509 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 510 // Fail if we are taking snapshot for the given table 511 if ( 512 env.getMasterServices().getSnapshotManager() 513 .isTableTakingAnySnapshot(getParentRegion().getTable()) 514 ) { 515 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() 516 + ", because we are taking snapshot for the table " + getParentRegion().getTable())); 517 return false; 518 } 519 520 /* 521 * Sometimes a ModifyTableProcedure has edited a table descriptor to change the number of region 522 * replicas for a table, but it has not yet opened/closed the new replicas. The 523 * ModifyTableProcedure assumes that nobody else will do the opening/closing of the new 524 * replicas, but a concurrent SplitTableRegionProcedure would violate that assumption. 525 */ 526 if (checkTableModifyInProgress && isTableModificationInProgress(env)) { 527 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() 528 + ", because there is an active procedure that is modifying the table " 529 + getParentRegion().getTable())); 530 return false; 531 } 532 533 // Check whether the region is splittable 534 RegionStateNode node = 535 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 536 537 if (node == null) { 538 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 539 } 540 541 RegionInfo parentHRI = node.getRegionInfo(); 542 if (parentHRI == null) { 543 LOG.info("Unsplittable; parent region is null; node={}", node); 544 return false; 545 } 546 // Lookup the parent HRI state from the AM, which has the latest updated info. 547 // Protect against the case where concurrent SPLIT requests came in and succeeded 548 // just before us. 549 if (node.isInState(State.SPLIT)) { 550 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 551 return false; 552 } 553 if (parentHRI.isSplit() || parentHRI.isOffline()) { 554 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 555 return false; 556 } 557 558 // expected parent to be online or closed 559 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 560 // We may have SPLIT already? 561 setFailure( 562 new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state=" 563 + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES))); 564 return false; 565 } 566 567 // Mostly the below two checks are not used because we already check the switches before 568 // submitting the split procedure. Just for safety, we are checking the switch again here. 569 // Also, in case the switch was set to false after submission, this procedure can be rollbacked, 570 // thanks to this double check! 571 // case 1: check for cluster level switch 572 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 573 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 574 setFailure(new IOException( 575 "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off")); 576 return false; 577 } 578 // case 2: check for table level switch 579 if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { 580 LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), 581 parentHRI); 582 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() 583 + " failed as region split is disabled for the table")); 584 return false; 585 } 586 587 // set node state as SPLITTING 588 node.setState(State.SPLITTING); 589 590 // Since we have the lock and the master is coordinating the operation 591 // we are always able to split the region 592 return true; 593 } 594 595 /** 596 * Rollback prepare split region 597 * @param env MasterProcedureEnv 598 */ 599 private void rollbackPrepareSplit(final MasterProcedureEnv env) { 600 RegionStateNode parentRegionStateNode = 601 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 602 if (parentRegionStateNode.getState() == State.SPLITTING) { 603 parentRegionStateNode.setState(State.OPEN); 604 } 605 } 606 607 /** 608 * Action before splitting region in a table. 609 * @param env MasterProcedureEnv 610 */ 611 private void preSplitRegion(final MasterProcedureEnv env) 612 throws IOException, InterruptedException { 613 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 614 if (cpHost != null) { 615 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 616 } 617 618 // TODO: Clean up split and merge. Currently all over the place. 619 // Notify QuotaManager and RegionNormalizer 620 try { 621 MasterQuotaManager masterQuotaManager = env.getMasterServices().getMasterQuotaManager(); 622 if (masterQuotaManager != null) { 623 masterQuotaManager.onRegionSplit(this.getParentRegion()); 624 } 625 } catch (QuotaExceededException e) { 626 // TODO: why is this here? split requests can be submitted by actors other than the normalizer 627 env.getMasterServices().getRegionNormalizerManager() 628 .planSkipped(NormalizationPlan.PlanType.SPLIT); 629 throw e; 630 } 631 } 632 633 /** 634 * Action after rollback a split table region action. 635 * @param env MasterProcedureEnv 636 */ 637 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 638 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 639 if (cpHost != null) { 640 cpHost.postRollBackSplitRegionAction(getUser()); 641 } 642 } 643 644 /** 645 * Rollback close parent region 646 */ 647 private void openParentRegion(MasterProcedureEnv env) throws IOException { 648 AssignmentManagerUtil.reopenRegionsForRollback(env, 649 Collections.singletonList((getParentRegion())), getRegionReplication(env), 650 getParentRegionServerName(env)); 651 } 652 653 /** 654 * Create daughter regions 655 */ 656 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 657 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 658 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 659 final FileSystem fs = mfs.getFileSystem(); 660 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 661 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 662 regionFs.createSplitsDir(daughterOneRI, daughterTwoRI); 663 664 Pair<List<Path>, List<Path>> expectedReferences = splitStoreFiles(env, regionFs); 665 666 assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(), 667 regionFs.getSplitsDir(daughterOneRI)); 668 regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env); 669 assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(), 670 new Path(tabledir, daughterOneRI.getEncodedName())); 671 672 assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(), 673 regionFs.getSplitsDir(daughterTwoRI)); 674 regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env); 675 assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(), 676 new Path(tabledir, daughterTwoRI.getEncodedName())); 677 } 678 679 private void deleteDaughterRegions(final MasterProcedureEnv env) throws IOException { 680 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 681 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 682 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 683 tabledir, daughterOneRI); 684 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 685 tabledir, daughterTwoRI); 686 } 687 688 /** 689 * Create Split directory 690 * @param env MasterProcedureEnv 691 */ 692 private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv env, 693 final HRegionFileSystem regionFs) throws IOException { 694 final Configuration conf = env.getMasterConfiguration(); 695 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 696 // The following code sets up a thread pool executor with as many slots as 697 // there's files to split. It then fires up everything, waits for 698 // completion and finally checks for any exception 699 // 700 // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the 701 // table dir. In case of failure, the proc would go through this again, already existing 702 // region dirs and split files would just be ignored, new split files should get created. 703 int nbFiles = 0; 704 final Map<String, Collection<StoreFileInfo>> files = 705 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 706 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 707 String family = cfd.getNameAsString(); 708 StoreFileTracker tracker = 709 StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs); 710 Collection<StoreFileInfo> sfis = tracker.load(); 711 if (sfis == null) { 712 continue; 713 } 714 Collection<StoreFileInfo> filteredSfis = null; 715 for (StoreFileInfo sfi : sfis) { 716 // Filter. There is a lag cleaning up compacted reference files. They get cleared 717 // after a delay in case outstanding Scanners still have references. Because of this, 718 // the listing of the Store content may have straggler reference files. Skip these. 719 // It should be safe to skip references at this point because we checked above with 720 // the region if it thinks it is splittable and if we are here, it thinks it is 721 // splitable. 722 if (sfi.isReference()) { 723 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 724 continue; 725 } 726 if (filteredSfis == null) { 727 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 728 files.put(family, filteredSfis); 729 } 730 filteredSfis.add(sfi); 731 nbFiles++; 732 } 733 } 734 if (nbFiles == 0) { 735 // no file needs to be splitted. 736 return new Pair<>(Collections.emptyList(), Collections.emptyList()); 737 } 738 // Max #threads is the smaller of the number of storefiles or the default max determined above. 739 int maxThreads = Math.min( 740 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 741 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 742 nbFiles); 743 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" 744 + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 745 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 746 new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true) 747 .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build()); 748 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 749 750 // Split each store file. 751 for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) { 752 byte[] familyName = Bytes.toBytes(e.getKey()); 753 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 754 Collection<StoreFileInfo> storeFileInfos = e.getValue(); 755 final Collection<StoreFileInfo> storeFiles = storeFileInfos; 756 if (storeFiles != null && storeFiles.size() > 0) { 757 final Configuration storeConfiguration = 758 StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd); 759 for (StoreFileInfo storeFileInfo : storeFiles) { 760 // As this procedure is running on master, use CacheConfig.DISABLED means 761 // don't cache any block. 762 // We also need to pass through a suitable CompoundConfiguration as if this 763 // is running in a regionserver's Store context, or we might not be able 764 // to read the hfiles. 765 storeFileInfo.setConf(storeConfiguration); 766 StoreFileSplitter sfs = new StoreFileSplitter(regionFs, htd, hcd, 767 new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); 768 futures.add(threadPool.submit(sfs)); 769 } 770 } 771 } 772 // Shutdown the pool 773 threadPool.shutdown(); 774 775 // Wait for all the tasks to finish. 776 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 777 // hbase.regionserver.fileSplitTimeout. If set, use its value. 778 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 779 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 780 try { 781 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 782 if (stillRunning) { 783 threadPool.shutdownNow(); 784 // wait for the thread to shutdown completely. 785 while (!threadPool.isTerminated()) { 786 Thread.sleep(50); 787 } 788 throw new IOException( 789 "Took too long to split the" + " files and create the references, aborting split"); 790 } 791 } catch (InterruptedException e) { 792 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 793 } 794 795 List<Path> daughterA = new ArrayList<>(); 796 List<Path> daughterB = new ArrayList<>(); 797 // Look for any exception 798 for (Future<Pair<Path, Path>> future : futures) { 799 try { 800 Pair<Path, Path> p = future.get(); 801 if (p.getFirst() != null) { 802 daughterA.add(p.getFirst()); 803 } 804 if (p.getSecond() != null) { 805 daughterB.add(p.getSecond()); 806 } 807 } catch (InterruptedException e) { 808 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 809 } catch (ExecutionException e) { 810 throw new IOException(e); 811 } 812 } 813 814 if (LOG.isDebugEnabled()) { 815 LOG.debug("pid=" + getProcId() + " split storefiles for region " 816 + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA 817 + " storefiles, Daughter B: " + daughterB + " storefiles."); 818 } 819 return new Pair<>(daughterA, daughterB); 820 } 821 822 private void assertSplitResultFilesCount(final FileSystem fs, 823 final int expectedSplitResultFileCount, Path dir) throws IOException { 824 if (expectedSplitResultFileCount != 0) { 825 int resultFileCount = FSUtils.getRegionReferenceAndLinkFileCount(fs, dir); 826 if (expectedSplitResultFileCount != resultFileCount) { 827 throw new IOException("Failing split. Didn't have expected reference and HFileLink files" 828 + ", expected=" + expectedSplitResultFileCount + ", actual=" + resultFileCount); 829 } 830 } 831 } 832 833 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, TableDescriptor htd, 834 ColumnFamilyDescriptor hcd, HStoreFile sf) throws IOException { 835 if (LOG.isDebugEnabled()) { 836 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath() 837 + " for region: " + getParentRegion().getShortNameToLog()); 838 } 839 840 final byte[] splitRow = getSplitRow(); 841 final String familyName = hcd.getNameAsString(); 842 StoreFileTracker daughterOneSft = 843 StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd, 844 HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(), 845 regionFs.getTableDir(), daughterOneRI)); 846 StoreFileTracker daughterTwoSft = 847 StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd, 848 HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(), 849 regionFs.getTableDir(), daughterTwoRI)); 850 final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow, 851 false, splitPolicy, daughterOneSft); 852 final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow, 853 true, splitPolicy, daughterTwoSft); 854 if (LOG.isDebugEnabled()) { 855 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath() 856 + " for region: " + getParentRegion().getShortNameToLog()); 857 } 858 return new Pair<Path, Path>(path_first, path_second); 859 } 860 861 /** 862 * Utility class used to do the file splitting / reference writing in parallel instead of 863 * sequentially. 864 */ 865 private class StoreFileSplitter implements Callable<Pair<Path, Path>> { 866 private final HRegionFileSystem regionFs; 867 private final ColumnFamilyDescriptor hcd; 868 private final HStoreFile sf; 869 private final TableDescriptor htd; 870 871 /** 872 * Constructor that takes what it needs to split 873 * @param regionFs the file system 874 * @param hcd Family that contains the store file 875 * @param sf which file 876 */ 877 public StoreFileSplitter(HRegionFileSystem regionFs, TableDescriptor htd, 878 ColumnFamilyDescriptor hcd, HStoreFile sf) { 879 this.regionFs = regionFs; 880 this.sf = sf; 881 this.hcd = hcd; 882 this.htd = htd; 883 } 884 885 @Override 886 public Pair<Path, Path> call() throws IOException { 887 return splitStoreFile(regionFs, htd, hcd, sf); 888 } 889 } 890 891 /** 892 * Post split region actions before the Point-of-No-Return step 893 * @param env MasterProcedureEnv 894 **/ 895 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 896 throws IOException, InterruptedException { 897 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 898 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 899 if (cpHost != null) { 900 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 901 try { 902 for (Mutation p : metaEntries) { 903 RegionInfo.parseRegionName(p.getRow()); 904 } 905 } catch (IOException e) { 906 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 907 + "region name." + "Mutations from coprocessor should only for hbase:meta table."); 908 throw e; 909 } 910 } 911 } 912 913 /** 914 * Add daughter regions to META 915 * @param env MasterProcedureEnv 916 */ 917 private void updateMeta(final MasterProcedureEnv env) throws IOException { 918 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 919 daughterOneRI, daughterTwoRI); 920 } 921 922 /** 923 * Pre split region actions after the Point-of-No-Return step 924 * @param env MasterProcedureEnv 925 **/ 926 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 927 throws IOException, InterruptedException { 928 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 929 if (cpHost != null) { 930 cpHost.preSplitAfterMETAAction(getUser()); 931 } 932 } 933 934 /** 935 * Post split region actions 936 * @param env MasterProcedureEnv 937 **/ 938 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 939 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 940 if (cpHost != null) { 941 cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser()); 942 } 943 } 944 945 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 946 return env.getMasterServices().getAssignmentManager().getRegionStates() 947 .getRegionServerOfRegion(getParentRegion()); 948 } 949 950 private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env) 951 throws IOException { 952 return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env, 953 Stream.of(getParentRegion()), getRegionReplication(env)); 954 } 955 956 private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) 957 throws IOException { 958 List<RegionInfo> hris = new ArrayList<RegionInfo>(2); 959 hris.add(daughterOneRI); 960 hris.add(daughterTwoRI); 961 return AssignmentManagerUtil.createAssignProceduresForSplitDaughters(env, hris, 962 getRegionReplication(env), getParentRegionServerName(env)); 963 } 964 965 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 966 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 967 return htd.getRegionReplication(); 968 } 969 970 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 971 MasterFileSystem fs = env.getMasterFileSystem(); 972 long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(), 973 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 974 if (maxSequenceId > 0) { 975 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 976 getWALRegionDir(env, daughterOneRI), maxSequenceId); 977 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 978 getWALRegionDir(env, daughterTwoRI), maxSequenceId); 979 } 980 } 981 982 @Override 983 protected boolean abort(MasterProcedureEnv env) { 984 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 985 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 986 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 987 return isRollbackSupported(getCurrentState()) ? super.abort(env) : false; 988 } 989}