001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Stream; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.TableDescriptor; 050import org.apache.hadoop.hbase.io.hfile.CacheConfig; 051import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 052import org.apache.hadoop.hbase.master.MasterFileSystem; 053import org.apache.hadoop.hbase.master.RegionState.State; 054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 060import org.apache.hadoop.hbase.quotas.MasterQuotaManager; 061import org.apache.hadoop.hbase.quotas.QuotaExceededException; 062import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 063import org.apache.hadoop.hbase.regionserver.HStore; 064import org.apache.hadoop.hbase.regionserver.HStoreFile; 065import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 066import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction; 067import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 068import org.apache.hadoop.hbase.regionserver.StoreUtils; 069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 070import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 071import org.apache.hadoop.hbase.util.Bytes; 072import org.apache.hadoop.hbase.util.CommonFSUtils; 073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 074import org.apache.hadoop.hbase.util.FSUtils; 075import org.apache.hadoop.hbase.util.Pair; 076import org.apache.hadoop.hbase.util.Threads; 077import org.apache.hadoop.hbase.wal.WALSplitUtil; 078import org.apache.hadoop.util.ReflectionUtils; 079import org.apache.yetus.audience.InterfaceAudience; 080import org.slf4j.Logger; 081import org.slf4j.LoggerFactory; 082 083import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; 084 085import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 086import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 088import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 089 090/** 091 * The procedure to split a region in a table. Takes lock on the parent region. It holds the lock 092 * for the life of the procedure. 093 * <p> 094 * Throws exception on construction if determines context hostile to spllt (cluster going down or 095 * master is shutting down or table is disabled). 096 * </p> 097 */ 098@InterfaceAudience.Private 099public class SplitTableRegionProcedure 100 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 101 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 102 private RegionInfo daughterOneRI; 103 private RegionInfo daughterTwoRI; 104 private byte[] bestSplitRow; 105 private RegionSplitPolicy splitPolicy; 106 107 public SplitTableRegionProcedure() { 108 // Required by the Procedure framework to create the procedure on replay 109 } 110 111 public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo regionToSplit, 112 final byte[] splitRow) throws IOException { 113 super(env, regionToSplit); 114 preflightChecks(env, true); 115 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 116 // we fail-fast on construction. There it skips the split with just a warning. 117 checkOnline(env, regionToSplit); 118 this.bestSplitRow = splitRow; 119 TableDescriptor tableDescriptor = 120 env.getMasterServices().getTableDescriptors().get(getTableName()); 121 Configuration conf = env.getMasterConfiguration(); 122 if (hasBestSplitRow()) { 123 // Apply the split restriction for the table to the user-specified split point 124 RegionSplitRestriction splitRestriction = 125 RegionSplitRestriction.create(tableDescriptor, conf); 126 byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow); 127 if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) { 128 LOG.warn( 129 "The specified split point {} violates the split restriction of the table. " 130 + "Using {} as a split point.", 131 Bytes.toStringBinary(bestSplitRow), Bytes.toStringBinary(restrictedSplitRow)); 132 bestSplitRow = restrictedSplitRow; 133 } 134 } 135 checkSplittable(env, regionToSplit); 136 final TableName table = regionToSplit.getTable(); 137 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 138 this.daughterOneRI = 139 RegionInfoBuilder.newBuilder(table).setStartKey(regionToSplit.getStartKey()) 140 .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build(); 141 this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow) 142 .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build(); 143 144 if (tableDescriptor.getRegionSplitPolicyClassName() != null) { 145 // Since we don't have region reference here, creating the split policy instance without it. 146 // This can be used to invoke methods which don't require Region reference. This instantiation 147 // of a class on Master-side though it only makes sense on the RegionServer-side is 148 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 149 Class<? extends RegionSplitPolicy> clazz = 150 RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf); 151 this.splitPolicy = ReflectionUtils.newInstance(clazz, conf); 152 } 153 } 154 155 @Override 156 protected LockState acquireLock(final MasterProcedureEnv env) { 157 if ( 158 env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(), 159 daughterOneRI, daughterTwoRI) 160 ) { 161 try { 162 LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); 163 } catch (IOException e) { 164 // Ignore, just for logging 165 } 166 return LockState.LOCK_EVENT_WAIT; 167 } 168 return LockState.LOCK_ACQUIRED; 169 } 170 171 @Override 172 protected void releaseLock(final MasterProcedureEnv env) { 173 env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI, 174 daughterTwoRI); 175 } 176 177 public RegionInfo getDaughterOneRI() { 178 return daughterOneRI; 179 } 180 181 public RegionInfo getDaughterTwoRI() { 182 return daughterTwoRI; 183 } 184 185 private boolean hasBestSplitRow() { 186 return bestSplitRow != null && bestSplitRow.length > 0; 187 } 188 189 /** 190 * Check whether the region is splittable 191 * @param env MasterProcedureEnv 192 * @param regionToSplit parent Region to be split 193 */ 194 private void checkSplittable(final MasterProcedureEnv env, final RegionInfo regionToSplit) 195 throws IOException { 196 // Ask the remote RS if this region is splittable. 197 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at 198 // this time. 199 if (regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 200 throw new IllegalArgumentException("Can't invoke split on non-default regions directly"); 201 } 202 RegionStateNode node = 203 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 204 IOException splittableCheckIOE = null; 205 boolean splittable = false; 206 if (node != null) { 207 try { 208 GetRegionInfoResponse response; 209 if (!hasBestSplitRow()) { 210 LOG.info( 211 "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}", 212 node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation()); 213 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 214 node.getRegionInfo(), true); 215 bestSplitRow = 216 response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 217 } else { 218 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 219 node.getRegionInfo(), false); 220 } 221 splittable = response.hasSplittable() && response.getSplittable(); 222 if (LOG.isDebugEnabled()) { 223 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 224 } 225 } catch (IOException e) { 226 splittableCheckIOE = e; 227 } 228 } 229 230 if (!splittable) { 231 IOException e = 232 new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 233 if (splittableCheckIOE != null) { 234 e.initCause(splittableCheckIOE); 235 } 236 throw e; 237 } 238 239 if (!hasBestSplitRow()) { 240 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " 241 + "maybe table is too small for auto split. For force split, try specifying split row"); 242 } 243 244 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 245 throw new DoNotRetryIOException( 246 "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow)); 247 } 248 249 if (!regionToSplit.containsRow(bestSplitRow)) { 250 throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" 251 + Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit); 252 } 253 } 254 255 /** 256 * Calculate daughter regionid to use. 257 * @param hri Parent {@link RegionInfo} 258 * @return Daughter region id (timestamp) to use. 259 */ 260 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 261 long rid = EnvironmentEdgeManager.currentTime(); 262 // Regionid is timestamp. Can't be less than that of parent else will insert 263 // at wrong location in hbase:meta (See HBASE-710). 264 if (rid < hri.getRegionId()) { 265 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() 266 + " but current time here is " + rid); 267 rid = hri.getRegionId() + 1; 268 } 269 return rid; 270 } 271 272 private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException { 273 AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()), 274 getRegionReplication(env)); 275 } 276 277 private void checkClosedRegions(MasterProcedureEnv env) throws IOException { 278 // theoretically this should not happen any more after we use TRSP, but anyway let's add a check 279 // here 280 AssignmentManagerUtil.checkClosedRegion(env, getParentRegion()); 281 } 282 283 @Override 284 protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state) 285 throws InterruptedException { 286 LOG.trace("{} execute state={}", this, state); 287 288 try { 289 switch (state) { 290 case SPLIT_TABLE_REGION_PREPARE: 291 if (prepareSplitRegion(env)) { 292 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 293 break; 294 } else { 295 return Flow.NO_MORE_STATE; 296 } 297 case SPLIT_TABLE_REGION_PRE_OPERATION: 298 preSplitRegion(env); 299 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 300 break; 301 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 302 addChildProcedure(createUnassignProcedures(env)); 303 // createUnassignProcedures() can throw out IOException. If this happens, 304 // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions 305 // is closed as all created UnassignProcedures are rolled back. If it rolls back with 306 // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(), 307 // otherwise, it will result in OpenRegionProcedure for an already open region. 308 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 309 break; 310 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 311 checkClosedRegions(env); 312 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 313 break; 314 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 315 removeNonDefaultReplicas(env); 316 createDaughterRegions(env); 317 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 318 break; 319 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 320 writeMaxSequenceIdFile(env); 321 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 322 break; 323 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 324 preSplitRegionBeforeMETA(env); 325 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 326 break; 327 case SPLIT_TABLE_REGION_UPDATE_META: 328 updateMeta(env); 329 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 330 break; 331 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 332 preSplitRegionAfterMETA(env); 333 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 334 break; 335 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 336 addChildProcedure(createAssignProcedures(env)); 337 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 338 break; 339 case SPLIT_TABLE_REGION_POST_OPERATION: 340 postSplitRegion(env); 341 return Flow.NO_MORE_STATE; 342 default: 343 throw new UnsupportedOperationException(this + " unhandled state=" + state); 344 } 345 } catch (IOException e) { 346 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 347 if (!isRollbackSupported(state)) { 348 // We reach a state that cannot be rolled back. We just need to keep retrying. 349 LOG.warn(msg, e); 350 } else { 351 LOG.error(msg, e); 352 setFailure("master-split-regions", e); 353 } 354 } 355 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 356 return Flow.HAS_MORE_STATE; 357 } 358 359 /** 360 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously submitted 361 * for parent region to be split (rollback doesn't wait on the completion of the AssignProcedure) 362 * . This can be improved by changing rollback() to support sub-procedures. See HBASE-19851 for 363 * details. 364 */ 365 @Override 366 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 367 throws IOException, InterruptedException { 368 LOG.trace("{} rollback state={}", this, state); 369 370 try { 371 switch (state) { 372 case SPLIT_TABLE_REGION_POST_OPERATION: 373 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 374 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 375 case SPLIT_TABLE_REGION_UPDATE_META: 376 // PONR 377 throw new UnsupportedOperationException(this + " unhandled state=" + state); 378 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 379 break; 380 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 381 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 382 deleteDaughterRegions(env); 383 break; 384 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 385 openParentRegion(env); 386 break; 387 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 388 // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call 389 // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an 390 // already open region. 391 break; 392 case SPLIT_TABLE_REGION_PRE_OPERATION: 393 postRollBackSplitRegion(env); 394 break; 395 case SPLIT_TABLE_REGION_PREPARE: 396 rollbackPrepareSplit(env); 397 break; 398 default: 399 throw new UnsupportedOperationException(this + " unhandled state=" + state); 400 } 401 } catch (IOException e) { 402 // This will be retried. Unless there is a bug in the code, 403 // this should be just a "temporary error" (e.g. network down) 404 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state 405 + " for splitting the region " + getParentRegion().getEncodedName() + " in table " 406 + getTableName(), e); 407 throw e; 408 } 409 } 410 411 /* 412 * Check whether we are in the state that can be rollback 413 */ 414 @Override 415 protected boolean isRollbackSupported(final SplitTableRegionState state) { 416 switch (state) { 417 case SPLIT_TABLE_REGION_POST_OPERATION: 418 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 419 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 420 case SPLIT_TABLE_REGION_UPDATE_META: 421 // It is not safe to rollback if we reach to these states. 422 return false; 423 default: 424 break; 425 } 426 return true; 427 } 428 429 @Override 430 protected SplitTableRegionState getState(final int stateId) { 431 return SplitTableRegionState.forNumber(stateId); 432 } 433 434 @Override 435 protected int getStateId(final SplitTableRegionState state) { 436 return state.getNumber(); 437 } 438 439 @Override 440 protected SplitTableRegionState getInitialState() { 441 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 442 } 443 444 @Override 445 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 446 super.serializeStateData(serializer); 447 448 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 449 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 450 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 451 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 452 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI)) 453 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI)); 454 serializer.serialize(splitTableRegionMsg.build()); 455 } 456 457 @Override 458 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 459 super.deserializeStateData(serializer); 460 461 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 462 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 463 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 464 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 465 assert (splitTableRegionsMsg.getChildRegionInfoCount() == 2); 466 daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 467 daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 468 } 469 470 @Override 471 public void toStringClassDetails(StringBuilder sb) { 472 sb.append(getClass().getSimpleName()); 473 sb.append(" table="); 474 sb.append(getTableName()); 475 sb.append(", parent="); 476 sb.append(getParentRegion().getShortNameToLog()); 477 sb.append(", daughterA="); 478 sb.append(daughterOneRI.getShortNameToLog()); 479 sb.append(", daughterB="); 480 sb.append(daughterTwoRI.getShortNameToLog()); 481 } 482 483 private RegionInfo getParentRegion() { 484 return getRegion(); 485 } 486 487 @Override 488 public TableOperationType getTableOperationType() { 489 return TableOperationType.REGION_SPLIT; 490 } 491 492 @Override 493 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 494 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 495 } 496 497 private byte[] getSplitRow() { 498 return daughterTwoRI.getStartKey(); 499 } 500 501 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 502 503 /** 504 * Prepare to Split region. 505 * @param env MasterProcedureEnv 506 */ 507 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 508 // Fail if we are taking snapshot for the given table 509 if ( 510 env.getMasterServices().getSnapshotManager() 511 .isTableTakingAnySnapshot(getParentRegion().getTable()) 512 ) { 513 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() 514 + ", because we are taking snapshot for the table " + getParentRegion().getTable())); 515 return false; 516 } 517 // Check whether the region is splittable 518 RegionStateNode node = 519 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 520 521 if (node == null) { 522 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 523 } 524 525 RegionInfo parentHRI = node.getRegionInfo(); 526 if (parentHRI == null) { 527 LOG.info("Unsplittable; parent region is null; node={}", node); 528 return false; 529 } 530 // Lookup the parent HRI state from the AM, which has the latest updated info. 531 // Protect against the case where concurrent SPLIT requests came in and succeeded 532 // just before us. 533 if (node.isInState(State.SPLIT)) { 534 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 535 return false; 536 } 537 if (parentHRI.isSplit() || parentHRI.isOffline()) { 538 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 539 return false; 540 } 541 542 // expected parent to be online or closed 543 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 544 // We may have SPLIT already? 545 setFailure( 546 new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state=" 547 + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES))); 548 return false; 549 } 550 551 // Mostly the below two checks are not used because we already check the switches before 552 // submitting the split procedure. Just for safety, we are checking the switch again here. 553 // Also, in case the switch was set to false after submission, this procedure can be rollbacked, 554 // thanks to this double check! 555 // case 1: check for cluster level switch 556 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 557 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 558 setFailure(new IOException( 559 "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off")); 560 return false; 561 } 562 // case 2: check for table level switch 563 if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { 564 LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), 565 parentHRI); 566 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() 567 + " failed as region split is disabled for the table")); 568 return false; 569 } 570 571 // set node state as SPLITTING 572 node.setState(State.SPLITTING); 573 574 // Since we have the lock and the master is coordinating the operation 575 // we are always able to split the region 576 return true; 577 } 578 579 /** 580 * Rollback prepare split region 581 * @param env MasterProcedureEnv 582 */ 583 private void rollbackPrepareSplit(final MasterProcedureEnv env) { 584 RegionStateNode parentRegionStateNode = 585 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 586 if (parentRegionStateNode.getState() == State.SPLITTING) { 587 parentRegionStateNode.setState(State.OPEN); 588 } 589 } 590 591 /** 592 * Action before splitting region in a table. 593 * @param env MasterProcedureEnv 594 */ 595 private void preSplitRegion(final MasterProcedureEnv env) 596 throws IOException, InterruptedException { 597 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 598 if (cpHost != null) { 599 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 600 } 601 602 // TODO: Clean up split and merge. Currently all over the place. 603 // Notify QuotaManager and RegionNormalizer 604 try { 605 MasterQuotaManager masterQuotaManager = env.getMasterServices().getMasterQuotaManager(); 606 if (masterQuotaManager != null) { 607 masterQuotaManager.onRegionSplit(this.getParentRegion()); 608 } 609 } catch (QuotaExceededException e) { 610 // TODO: why is this here? split requests can be submitted by actors other than the normalizer 611 env.getMasterServices().getRegionNormalizerManager() 612 .planSkipped(NormalizationPlan.PlanType.SPLIT); 613 throw e; 614 } 615 } 616 617 /** 618 * Action after rollback a split table region action. 619 * @param env MasterProcedureEnv 620 */ 621 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 622 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 623 if (cpHost != null) { 624 cpHost.postRollBackSplitRegionAction(getUser()); 625 } 626 } 627 628 /** 629 * Rollback close parent region 630 */ 631 private void openParentRegion(MasterProcedureEnv env) throws IOException { 632 AssignmentManagerUtil.reopenRegionsForRollback(env, 633 Collections.singletonList((getParentRegion())), getRegionReplication(env), 634 getParentRegionServerName(env)); 635 } 636 637 /** 638 * Create daughter regions 639 */ 640 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 641 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 642 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 643 final FileSystem fs = mfs.getFileSystem(); 644 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 645 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 646 regionFs.createSplitsDir(daughterOneRI, daughterTwoRI); 647 648 Pair<List<Path>, List<Path>> expectedReferences = splitStoreFiles(env, regionFs); 649 650 assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(), 651 regionFs.getSplitsDir(daughterOneRI)); 652 regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env); 653 assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(), 654 new Path(tabledir, daughterOneRI.getEncodedName())); 655 656 assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(), 657 regionFs.getSplitsDir(daughterTwoRI)); 658 regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env); 659 assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(), 660 new Path(tabledir, daughterTwoRI.getEncodedName())); 661 } 662 663 private void deleteDaughterRegions(final MasterProcedureEnv env) throws IOException { 664 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 665 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 666 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 667 tabledir, daughterOneRI); 668 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 669 tabledir, daughterTwoRI); 670 } 671 672 /** 673 * Create Split directory 674 * @param env MasterProcedureEnv 675 */ 676 private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv env, 677 final HRegionFileSystem regionFs) throws IOException { 678 final Configuration conf = env.getMasterConfiguration(); 679 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 680 // The following code sets up a thread pool executor with as many slots as 681 // there's files to split. It then fires up everything, waits for 682 // completion and finally checks for any exception 683 // 684 // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the 685 // table dir. In case of failure, the proc would go through this again, already existing 686 // region dirs and split files would just be ignored, new split files should get created. 687 int nbFiles = 0; 688 final Map<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>> files = 689 new HashMap<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>>( 690 htd.getColumnFamilyCount()); 691 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 692 String family = cfd.getNameAsString(); 693 StoreFileTracker tracker = 694 StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs); 695 Collection<StoreFileInfo> sfis = tracker.load(); 696 if (sfis == null) { 697 continue; 698 } 699 Collection<StoreFileInfo> filteredSfis = null; 700 for (StoreFileInfo sfi : sfis) { 701 // Filter. There is a lag cleaning up compacted reference files. They get cleared 702 // after a delay in case outstanding Scanners still have references. Because of this, 703 // the listing of the Store content may have straggler reference files. Skip these. 704 // It should be safe to skip references at this point because we checked above with 705 // the region if it thinks it is splittable and if we are here, it thinks it is 706 // splitable. 707 if (sfi.isReference()) { 708 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 709 continue; 710 } 711 if (filteredSfis == null) { 712 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 713 files.put(family, new Pair(filteredSfis, tracker)); 714 } 715 filteredSfis.add(sfi); 716 nbFiles++; 717 } 718 } 719 if (nbFiles == 0) { 720 // no file needs to be splitted. 721 return new Pair<>(Collections.emptyList(), Collections.emptyList()); 722 } 723 // Max #threads is the smaller of the number of storefiles or the default max determined above. 724 int maxThreads = Math.min( 725 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 726 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 727 nbFiles); 728 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" 729 + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 730 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 731 new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true) 732 .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build()); 733 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 734 735 // Split each store file. 736 for (Map.Entry<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>> e : files 737 .entrySet()) { 738 byte[] familyName = Bytes.toBytes(e.getKey()); 739 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 740 Pair<Collection<StoreFileInfo>, StoreFileTracker> storeFilesAndTracker = e.getValue(); 741 final Collection<StoreFileInfo> storeFiles = storeFilesAndTracker.getFirst(); 742 if (storeFiles != null && storeFiles.size() > 0) { 743 final Configuration storeConfiguration = 744 StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd); 745 for (StoreFileInfo storeFileInfo : storeFiles) { 746 // As this procedure is running on master, use CacheConfig.DISABLED means 747 // don't cache any block. 748 // We also need to pass through a suitable CompoundConfiguration as if this 749 // is running in a regionserver's Store context, or we might not be able 750 // to read the hfiles. 751 storeFileInfo.setConf(storeConfiguration); 752 StoreFileSplitter sfs = 753 new StoreFileSplitter(regionFs, storeFilesAndTracker.getSecond(), familyName, 754 new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); 755 futures.add(threadPool.submit(sfs)); 756 } 757 } 758 } 759 // Shutdown the pool 760 threadPool.shutdown(); 761 762 // Wait for all the tasks to finish. 763 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 764 // hbase.regionserver.fileSplitTimeout. If set, use its value. 765 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 766 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 767 try { 768 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 769 if (stillRunning) { 770 threadPool.shutdownNow(); 771 // wait for the thread to shutdown completely. 772 while (!threadPool.isTerminated()) { 773 Thread.sleep(50); 774 } 775 throw new IOException( 776 "Took too long to split the" + " files and create the references, aborting split"); 777 } 778 } catch (InterruptedException e) { 779 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 780 } 781 782 List<Path> daughterA = new ArrayList<>(); 783 List<Path> daughterB = new ArrayList<>(); 784 // Look for any exception 785 for (Future<Pair<Path, Path>> future : futures) { 786 try { 787 Pair<Path, Path> p = future.get(); 788 if (p.getFirst() != null) { 789 daughterA.add(p.getFirst()); 790 } 791 if (p.getSecond() != null) { 792 daughterB.add(p.getSecond()); 793 } 794 } catch (InterruptedException e) { 795 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 796 } catch (ExecutionException e) { 797 throw new IOException(e); 798 } 799 } 800 801 if (LOG.isDebugEnabled()) { 802 LOG.debug("pid=" + getProcId() + " split storefiles for region " 803 + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA 804 + " storefiles, Daughter B: " + daughterB + " storefiles."); 805 } 806 return new Pair<>(daughterA, daughterB); 807 } 808 809 private void assertSplitResultFilesCount(final FileSystem fs, 810 final int expectedSplitResultFileCount, Path dir) throws IOException { 811 if (expectedSplitResultFileCount != 0) { 812 int resultFileCount = FSUtils.getRegionReferenceAndLinkFileCount(fs, dir); 813 if (expectedSplitResultFileCount != resultFileCount) { 814 throw new IOException("Failing split. Didn't have expected reference and HFileLink files" 815 + ", expected=" + expectedSplitResultFileCount + ", actual=" + resultFileCount); 816 } 817 } 818 } 819 820 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, StoreFileTracker tracker, 821 byte[] family, HStoreFile sf) throws IOException { 822 if (LOG.isDebugEnabled()) { 823 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath() 824 + " for region: " + getParentRegion().getShortNameToLog()); 825 } 826 827 final byte[] splitRow = getSplitRow(); 828 final String familyName = Bytes.toString(family); 829 final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow, 830 false, splitPolicy, tracker); 831 final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow, 832 true, splitPolicy, tracker); 833 if (LOG.isDebugEnabled()) { 834 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath() 835 + " for region: " + getParentRegion().getShortNameToLog()); 836 } 837 return new Pair<Path, Path>(path_first, path_second); 838 } 839 840 /** 841 * Utility class used to do the file splitting / reference writing in parallel instead of 842 * sequentially. 843 */ 844 private class StoreFileSplitter implements Callable<Pair<Path, Path>> { 845 private final HRegionFileSystem regionFs; 846 private final byte[] family; 847 private final HStoreFile sf; 848 private final StoreFileTracker tracker; 849 850 /** 851 * Constructor that takes what it needs to split 852 * @param regionFs the file system 853 * @param family Family that contains the store file 854 * @param sf which file 855 */ 856 public StoreFileSplitter(HRegionFileSystem regionFs, StoreFileTracker tracker, byte[] family, 857 HStoreFile sf) { 858 this.regionFs = regionFs; 859 this.sf = sf; 860 this.family = family; 861 this.tracker = tracker; 862 } 863 864 @Override 865 public Pair<Path, Path> call() throws IOException { 866 return splitStoreFile(regionFs, tracker, family, sf); 867 } 868 } 869 870 /** 871 * Post split region actions before the Point-of-No-Return step 872 * @param env MasterProcedureEnv 873 **/ 874 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 875 throws IOException, InterruptedException { 876 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 877 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 878 if (cpHost != null) { 879 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 880 try { 881 for (Mutation p : metaEntries) { 882 RegionInfo.parseRegionName(p.getRow()); 883 } 884 } catch (IOException e) { 885 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 886 + "region name." + "Mutations from coprocessor should only for hbase:meta table."); 887 throw e; 888 } 889 } 890 } 891 892 /** 893 * Add daughter regions to META 894 * @param env MasterProcedureEnv 895 */ 896 private void updateMeta(final MasterProcedureEnv env) throws IOException { 897 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 898 daughterOneRI, daughterTwoRI); 899 } 900 901 /** 902 * Pre split region actions after the Point-of-No-Return step 903 * @param env MasterProcedureEnv 904 **/ 905 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 906 throws IOException, InterruptedException { 907 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 908 if (cpHost != null) { 909 cpHost.preSplitAfterMETAAction(getUser()); 910 } 911 } 912 913 /** 914 * Post split region actions 915 * @param env MasterProcedureEnv 916 **/ 917 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 918 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 919 if (cpHost != null) { 920 cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser()); 921 } 922 } 923 924 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 925 return env.getMasterServices().getAssignmentManager().getRegionStates() 926 .getRegionServerOfRegion(getParentRegion()); 927 } 928 929 private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env) 930 throws IOException { 931 return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env, 932 Stream.of(getParentRegion()), getRegionReplication(env)); 933 } 934 935 private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) 936 throws IOException { 937 List<RegionInfo> hris = new ArrayList<RegionInfo>(2); 938 hris.add(daughterOneRI); 939 hris.add(daughterTwoRI); 940 return AssignmentManagerUtil.createAssignProceduresForSplitDaughters(env, hris, 941 getRegionReplication(env), getParentRegionServerName(env)); 942 } 943 944 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 945 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 946 return htd.getRegionReplication(); 947 } 948 949 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 950 MasterFileSystem fs = env.getMasterFileSystem(); 951 long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(), 952 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 953 if (maxSequenceId > 0) { 954 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 955 getWALRegionDir(env, daughterOneRI), maxSequenceId); 956 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 957 getWALRegionDir(env, daughterTwoRI), maxSequenceId); 958 } 959 } 960 961 @Override 962 protected boolean abort(MasterProcedureEnv env) { 963 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 964 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 965 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 966 return isRollbackSupported(getCurrentState()) ? super.abort(env) : false; 967 } 968}