001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Stream; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.TableDescriptor; 050import org.apache.hadoop.hbase.io.hfile.CacheConfig; 051import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 052import org.apache.hadoop.hbase.master.MasterFileSystem; 053import org.apache.hadoop.hbase.master.RegionState.State; 054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 060import org.apache.hadoop.hbase.quotas.QuotaExceededException; 061import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 062import org.apache.hadoop.hbase.regionserver.HStore; 063import org.apache.hadoop.hbase.regionserver.HStoreFile; 064import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 065import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 066import org.apache.hadoop.hbase.util.Bytes; 067import org.apache.hadoop.hbase.util.CommonFSUtils; 068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 069import org.apache.hadoop.hbase.util.FSUtils; 070import org.apache.hadoop.hbase.util.Pair; 071import org.apache.hadoop.hbase.util.Threads; 072import org.apache.hadoop.hbase.wal.WALSplitUtil; 073import org.apache.hadoop.util.ReflectionUtils; 074import org.apache.yetus.audience.InterfaceAudience; 075import org.slf4j.Logger; 076import org.slf4j.LoggerFactory; 077 078import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; 079 080import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 081import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 082import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 083import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 084 085/** 086 * The procedure to split a region in a table. 087 * Takes lock on the parent region. 088 * It holds the lock for the life of the procedure. 089 * <p>Throws exception on construction if determines context hostile to spllt (cluster going 090 * down or master is shutting down or table is disabled).</p> 091 */ 092@InterfaceAudience.Private 093public class SplitTableRegionProcedure 094 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 095 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 096 private RegionInfo daughterOneRI; 097 private RegionInfo daughterTwoRI; 098 private byte[] bestSplitRow; 099 private RegionSplitPolicy splitPolicy; 100 101 public SplitTableRegionProcedure() { 102 // Required by the Procedure framework to create the procedure on replay 103 } 104 105 public SplitTableRegionProcedure(final MasterProcedureEnv env, 106 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 107 super(env, regionToSplit); 108 preflightChecks(env, true); 109 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 110 // we fail-fast on construction. There it skips the split with just a warning. 111 checkOnline(env, regionToSplit); 112 this.bestSplitRow = splitRow; 113 checkSplittable(env, regionToSplit); 114 final TableName table = regionToSplit.getTable(); 115 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 116 this.daughterOneRI = RegionInfoBuilder.newBuilder(table) 117 .setStartKey(regionToSplit.getStartKey()) 118 .setEndKey(bestSplitRow) 119 .setSplit(false) 120 .setRegionId(rid) 121 .build(); 122 this.daughterTwoRI = RegionInfoBuilder.newBuilder(table) 123 .setStartKey(bestSplitRow) 124 .setEndKey(regionToSplit.getEndKey()) 125 .setSplit(false) 126 .setRegionId(rid) 127 .build(); 128 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 129 if(htd.getRegionSplitPolicyClassName() != null) { 130 // Since we don't have region reference here, creating the split policy instance without it. 131 // This can be used to invoke methods which don't require Region reference. This instantiation 132 // of a class on Master-side though it only makes sense on the RegionServer-side is 133 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 134 Class<? extends RegionSplitPolicy> clazz = 135 RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration()); 136 this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration()); 137 } 138 } 139 140 @Override 141 protected LockState acquireLock(final MasterProcedureEnv env) { 142 if (env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(), 143 daughterOneRI, daughterTwoRI)) { 144 try { 145 LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); 146 } catch (IOException e) { 147 // Ignore, just for logging 148 } 149 return LockState.LOCK_EVENT_WAIT; 150 } 151 return LockState.LOCK_ACQUIRED; 152 } 153 154 @Override 155 protected void releaseLock(final MasterProcedureEnv env) { 156 env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI, 157 daughterTwoRI); 158 } 159 160 public RegionInfo getDaughterOneRI() { 161 return daughterOneRI; 162 } 163 164 public RegionInfo getDaughterTwoRI() { 165 return daughterTwoRI; 166 } 167 168 private boolean hasBestSplitRow() { 169 return bestSplitRow != null && bestSplitRow.length > 0; 170 } 171 172 /** 173 * Check whether the region is splittable 174 * @param env MasterProcedureEnv 175 * @param regionToSplit parent Region to be split 176 */ 177 private void checkSplittable(final MasterProcedureEnv env, 178 final RegionInfo regionToSplit) throws IOException { 179 // Ask the remote RS if this region is splittable. 180 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at 181 // this time. 182 if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 183 throw new IllegalArgumentException("Can't invoke split on non-default regions directly"); 184 } 185 RegionStateNode node = 186 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 187 IOException splittableCheckIOE = null; 188 boolean splittable = false; 189 if (node != null) { 190 try { 191 GetRegionInfoResponse response; 192 if (!hasBestSplitRow()) { 193 LOG.info( 194 "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}", 195 node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation()); 196 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 197 node.getRegionInfo(), true); 198 bestSplitRow = 199 response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 200 } else { 201 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 202 node.getRegionInfo(), false); 203 } 204 splittable = response.hasSplittable() && response.getSplittable(); 205 if (LOG.isDebugEnabled()) { 206 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 207 } 208 } catch (IOException e) { 209 splittableCheckIOE = e; 210 } 211 } 212 213 if (!splittable) { 214 IOException e = 215 new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 216 if (splittableCheckIOE != null) { 217 e.initCause(splittableCheckIOE); 218 } 219 throw e; 220 } 221 222 if (bestSplitRow == null || bestSplitRow.length == 0) { 223 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " + 224 "maybe table is too small for auto split. For force split, try specifying split row"); 225 } 226 227 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 228 throw new DoNotRetryIOException( 229 "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow)); 230 } 231 232 if (!regionToSplit.containsRow(bestSplitRow)) { 233 throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" + 234 Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit); 235 } 236 } 237 238 /** 239 * Calculate daughter regionid to use. 240 * @param hri Parent {@link RegionInfo} 241 * @return Daughter region id (timestamp) to use. 242 */ 243 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 244 long rid = EnvironmentEdgeManager.currentTime(); 245 // Regionid is timestamp. Can't be less than that of parent else will insert 246 // at wrong location in hbase:meta (See HBASE-710). 247 if (rid < hri.getRegionId()) { 248 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() + 249 " but current time here is " + rid); 250 rid = hri.getRegionId() + 1; 251 } 252 return rid; 253 } 254 255 private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException { 256 AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()), 257 getRegionReplication(env)); 258 } 259 260 private void checkClosedRegions(MasterProcedureEnv env) throws IOException { 261 // theoretically this should not happen any more after we use TRSP, but anyway let's add a check 262 // here 263 AssignmentManagerUtil.checkClosedRegion(env, getParentRegion()); 264 } 265 266 @Override 267 protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state) 268 throws InterruptedException { 269 LOG.trace("{} execute state={}", this, state); 270 271 try { 272 switch (state) { 273 case SPLIT_TABLE_REGION_PREPARE: 274 if (prepareSplitRegion(env)) { 275 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 276 break; 277 } else { 278 return Flow.NO_MORE_STATE; 279 } 280 case SPLIT_TABLE_REGION_PRE_OPERATION: 281 preSplitRegion(env); 282 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 283 break; 284 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 285 addChildProcedure(createUnassignProcedures(env)); 286 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 287 break; 288 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 289 checkClosedRegions(env); 290 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 291 break; 292 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 293 removeNonDefaultReplicas(env); 294 createDaughterRegions(env); 295 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 296 break; 297 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 298 writeMaxSequenceIdFile(env); 299 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 300 break; 301 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 302 preSplitRegionBeforeMETA(env); 303 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 304 break; 305 case SPLIT_TABLE_REGION_UPDATE_META: 306 updateMeta(env); 307 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 308 break; 309 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 310 preSplitRegionAfterMETA(env); 311 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 312 break; 313 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 314 addChildProcedure(createAssignProcedures(env)); 315 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 316 break; 317 case SPLIT_TABLE_REGION_POST_OPERATION: 318 postSplitRegion(env); 319 return Flow.NO_MORE_STATE; 320 default: 321 throw new UnsupportedOperationException(this + " unhandled state=" + state); 322 } 323 } catch (IOException e) { 324 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 325 if (!isRollbackSupported(state)) { 326 // We reach a state that cannot be rolled back. We just need to keep retrying. 327 LOG.warn(msg, e); 328 } else { 329 LOG.error(msg, e); 330 setFailure("master-split-regions", e); 331 } 332 } 333 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 334 return Flow.HAS_MORE_STATE; 335 } 336 337 /** 338 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously 339 * submitted for parent region to be split (rollback doesn't wait on the completion of the 340 * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures. 341 * See HBASE-19851 for details. 342 */ 343 @Override 344 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 345 throws IOException, InterruptedException { 346 LOG.trace("{} rollback state={}", this, state); 347 348 try { 349 switch (state) { 350 case SPLIT_TABLE_REGION_POST_OPERATION: 351 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 352 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 353 case SPLIT_TABLE_REGION_UPDATE_META: 354 // PONR 355 throw new UnsupportedOperationException(this + " unhandled state=" + state); 356 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 357 break; 358 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 359 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 360 // Doing nothing, as re-open parent region would clean up daughter region directories. 361 break; 362 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 363 // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, 364 // we will bring parent region online 365 break; 366 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 367 openParentRegion(env); 368 break; 369 case SPLIT_TABLE_REGION_PRE_OPERATION: 370 postRollBackSplitRegion(env); 371 break; 372 case SPLIT_TABLE_REGION_PREPARE: 373 break; // nothing to do 374 default: 375 throw new UnsupportedOperationException(this + " unhandled state=" + state); 376 } 377 } catch (IOException e) { 378 // This will be retried. Unless there is a bug in the code, 379 // this should be just a "temporary error" (e.g. network down) 380 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + 381 " for splitting the region " 382 + getParentRegion().getEncodedName() + " in table " + getTableName(), e); 383 throw e; 384 } 385 } 386 387 /* 388 * Check whether we are in the state that can be rollback 389 */ 390 @Override 391 protected boolean isRollbackSupported(final SplitTableRegionState state) { 392 switch (state) { 393 case SPLIT_TABLE_REGION_POST_OPERATION: 394 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 395 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 396 case SPLIT_TABLE_REGION_UPDATE_META: 397 // It is not safe to rollback if we reach to these states. 398 return false; 399 default: 400 break; 401 } 402 return true; 403 } 404 405 @Override 406 protected SplitTableRegionState getState(final int stateId) { 407 return SplitTableRegionState.forNumber(stateId); 408 } 409 410 @Override 411 protected int getStateId(final SplitTableRegionState state) { 412 return state.getNumber(); 413 } 414 415 @Override 416 protected SplitTableRegionState getInitialState() { 417 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 418 } 419 420 @Override 421 protected void serializeStateData(ProcedureStateSerializer serializer) 422 throws IOException { 423 super.serializeStateData(serializer); 424 425 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 426 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 427 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 428 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 429 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI)) 430 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI)); 431 serializer.serialize(splitTableRegionMsg.build()); 432 } 433 434 @Override 435 protected void deserializeStateData(ProcedureStateSerializer serializer) 436 throws IOException { 437 super.deserializeStateData(serializer); 438 439 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 440 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 441 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 442 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 443 assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); 444 daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 445 daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 446 } 447 448 @Override 449 public void toStringClassDetails(StringBuilder sb) { 450 sb.append(getClass().getSimpleName()); 451 sb.append(" table="); 452 sb.append(getTableName()); 453 sb.append(", parent="); 454 sb.append(getParentRegion().getShortNameToLog()); 455 sb.append(", daughterA="); 456 sb.append(daughterOneRI.getShortNameToLog()); 457 sb.append(", daughterB="); 458 sb.append(daughterTwoRI.getShortNameToLog()); 459 } 460 461 private RegionInfo getParentRegion() { 462 return getRegion(); 463 } 464 465 @Override 466 public TableOperationType getTableOperationType() { 467 return TableOperationType.REGION_SPLIT; 468 } 469 470 @Override 471 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 472 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 473 } 474 475 private byte[] getSplitRow() { 476 return daughterTwoRI.getStartKey(); 477 } 478 479 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 480 481 /** 482 * Prepare to Split region. 483 * @param env MasterProcedureEnv 484 */ 485 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 486 // Fail if we are taking snapshot for the given table 487 if (env.getMasterServices().getSnapshotManager() 488 .isTakingSnapshot(getParentRegion().getTable())) { 489 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() + 490 ", because we are taking snapshot for the table " + getParentRegion().getTable())); 491 return false; 492 } 493 // Check whether the region is splittable 494 RegionStateNode node = 495 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 496 497 if (node == null) { 498 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 499 } 500 501 RegionInfo parentHRI = node.getRegionInfo(); 502 if (parentHRI == null) { 503 LOG.info("Unsplittable; parent region is null; node={}", node); 504 return false; 505 } 506 // Lookup the parent HRI state from the AM, which has the latest updated info. 507 // Protect against the case where concurrent SPLIT requests came in and succeeded 508 // just before us. 509 if (node.isInState(State.SPLIT)) { 510 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 511 return false; 512 } 513 if (parentHRI.isSplit() || parentHRI.isOffline()) { 514 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 515 return false; 516 } 517 518 // expected parent to be online or closed 519 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 520 // We may have SPLIT already? 521 setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + 522 " FAILED because state=" + node.getState() + "; expected " + 523 Arrays.toString(EXPECTED_SPLIT_STATES))); 524 return false; 525 } 526 527 // Mostly this check is not used because we already check the switch before submit a split 528 // procedure. Just for safe, check the switch again. This procedure can be rollbacked if 529 // the switch was set to false after submit. 530 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 531 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 532 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() + 533 " failed due to split switch off")); 534 return false; 535 } 536 537 if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { 538 LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), 539 parentHRI); 540 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() 541 + " failed as region split is disabled for the table")); 542 return false; 543 } 544 545 // set node state as SPLITTING 546 node.setState(State.SPLITTING); 547 548 // Since we have the lock and the master is coordinating the operation 549 // we are always able to split the region 550 return true; 551 } 552 553 /** 554 * Action before splitting region in a table. 555 * @param env MasterProcedureEnv 556 */ 557 private void preSplitRegion(final MasterProcedureEnv env) 558 throws IOException, InterruptedException { 559 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 560 if (cpHost != null) { 561 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 562 } 563 564 // TODO: Clean up split and merge. Currently all over the place. 565 // Notify QuotaManager and RegionNormalizer 566 try { 567 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); 568 } catch (QuotaExceededException e) { 569 // TODO: why is this here? split requests can be submitted by actors other than the normalizer 570 env.getMasterServices() 571 .getRegionNormalizerManager() 572 .planSkipped(NormalizationPlan.PlanType.SPLIT); 573 throw e; 574 } 575 } 576 577 /** 578 * Action after rollback a split table region action. 579 * @param env MasterProcedureEnv 580 */ 581 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 582 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 583 if (cpHost != null) { 584 cpHost.postRollBackSplitRegionAction(getUser()); 585 } 586 } 587 588 /** 589 * Rollback close parent region 590 */ 591 private void openParentRegion(MasterProcedureEnv env) throws IOException { 592 AssignmentManagerUtil.reopenRegionsForRollback(env, 593 Collections.singletonList((getParentRegion())), getRegionReplication(env), 594 getParentRegionServerName(env)); 595 } 596 597 /** 598 * Create daughter regions 599 */ 600 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 601 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 602 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 603 final FileSystem fs = mfs.getFileSystem(); 604 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 605 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 606 regionFs.createSplitsDir(daughterOneRI, daughterTwoRI); 607 608 Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); 609 610 assertReferenceFileCount(fs, expectedReferences.getFirst(), 611 regionFs.getSplitsDir(daughterOneRI)); 612 //Move the files from the temporary .splits to the final /table/region directory 613 regionFs.commitDaughterRegion(daughterOneRI); 614 assertReferenceFileCount(fs, expectedReferences.getFirst(), 615 new Path(tabledir, daughterOneRI.getEncodedName())); 616 617 assertReferenceFileCount(fs, expectedReferences.getSecond(), 618 regionFs.getSplitsDir(daughterTwoRI)); 619 regionFs.commitDaughterRegion(daughterTwoRI); 620 assertReferenceFileCount(fs, expectedReferences.getSecond(), 621 new Path(tabledir, daughterTwoRI.getEncodedName())); 622 } 623 624 /** 625 * Create Split directory 626 * @param env MasterProcedureEnv 627 */ 628 private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, 629 final HRegionFileSystem regionFs) throws IOException { 630 final Configuration conf = env.getMasterConfiguration(); 631 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 632 // The following code sets up a thread pool executor with as many slots as 633 // there's files to split. It then fires up everything, waits for 634 // completion and finally checks for any exception 635 // 636 // Note: splitStoreFiles creates daughter region dirs under the parent splits dir 637 // Nothing to unroll here if failure -- re-run createSplitsDir will 638 // clean this up. 639 int nbFiles = 0; 640 final Map<String, Collection<StoreFileInfo>> files = 641 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 642 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 643 String family = cfd.getNameAsString(); 644 Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family); 645 if (sfis == null) { 646 continue; 647 } 648 Collection<StoreFileInfo> filteredSfis = null; 649 for (StoreFileInfo sfi : sfis) { 650 // Filter. There is a lag cleaning up compacted reference files. They get cleared 651 // after a delay in case outstanding Scanners still have references. Because of this, 652 // the listing of the Store content may have straggler reference files. Skip these. 653 // It should be safe to skip references at this point because we checked above with 654 // the region if it thinks it is splittable and if we are here, it thinks it is 655 // splitable. 656 if (sfi.isReference()) { 657 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 658 continue; 659 } 660 if (filteredSfis == null) { 661 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 662 files.put(family, filteredSfis); 663 } 664 filteredSfis.add(sfi); 665 nbFiles++; 666 } 667 } 668 if (nbFiles == 0) { 669 // no file needs to be splitted. 670 return new Pair<Integer, Integer>(0, 0); 671 } 672 // Max #threads is the smaller of the number of storefiles or the default max determined above. 673 int maxThreads = Math.min( 674 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 675 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 676 nbFiles); 677 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" + 678 getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 679 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 680 new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true) 681 .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build()); 682 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 683 684 // Split each store file. 685 for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) { 686 byte[] familyName = Bytes.toBytes(e.getKey()); 687 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 688 final Collection<StoreFileInfo> storeFiles = e.getValue(); 689 if (storeFiles != null && storeFiles.size() > 0) { 690 for (StoreFileInfo storeFileInfo : storeFiles) { 691 // As this procedure is running on master, use CacheConfig.DISABLED means 692 // don't cache any block. 693 StoreFileSplitter sfs = 694 new StoreFileSplitter(regionFs, familyName, new HStoreFile( 695 storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); 696 futures.add(threadPool.submit(sfs)); 697 } 698 } 699 } 700 // Shutdown the pool 701 threadPool.shutdown(); 702 703 // Wait for all the tasks to finish. 704 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 705 // hbase.regionserver.fileSplitTimeout. If set, use its value. 706 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 707 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 708 try { 709 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 710 if (stillRunning) { 711 threadPool.shutdownNow(); 712 // wait for the thread to shutdown completely. 713 while (!threadPool.isTerminated()) { 714 Thread.sleep(50); 715 } 716 throw new IOException( 717 "Took too long to split the" + " files and create the references, aborting split"); 718 } 719 } catch (InterruptedException e) { 720 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 721 } 722 723 int daughterA = 0; 724 int daughterB = 0; 725 // Look for any exception 726 for (Future<Pair<Path, Path>> future : futures) { 727 try { 728 Pair<Path, Path> p = future.get(); 729 daughterA += p.getFirst() != null ? 1 : 0; 730 daughterB += p.getSecond() != null ? 1 : 0; 731 } catch (InterruptedException e) { 732 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 733 } catch (ExecutionException e) { 734 throw new IOException(e); 735 } 736 } 737 738 if (LOG.isDebugEnabled()) { 739 LOG.debug("pid=" + getProcId() + " split storefiles for region " + 740 getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA + 741 " storefiles, Daughter B: " + daughterB + " storefiles."); 742 } 743 return new Pair<Integer, Integer>(daughterA, daughterB); 744 } 745 746 private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount, 747 final Path dir) throws IOException { 748 if (expectedReferenceFileCount != 0 && 749 expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) { 750 throw new IOException("Failing split. Expected reference file count isn't equal."); 751 } 752 } 753 754 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) 755 throws IOException { 756 if (LOG.isDebugEnabled()) { 757 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + 758 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 759 } 760 761 final byte[] splitRow = getSplitRow(); 762 final String familyName = Bytes.toString(family); 763 final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow, 764 false, splitPolicy); 765 final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow, 766 true, splitPolicy); 767 if (LOG.isDebugEnabled()) { 768 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + 769 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 770 } 771 return new Pair<Path,Path>(path_first, path_second); 772 } 773 774 /** 775 * Utility class used to do the file splitting / reference writing 776 * in parallel instead of sequentially. 777 */ 778 private class StoreFileSplitter implements Callable<Pair<Path,Path>> { 779 private final HRegionFileSystem regionFs; 780 private final byte[] family; 781 private final HStoreFile sf; 782 783 /** 784 * Constructor that takes what it needs to split 785 * @param regionFs the file system 786 * @param family Family that contains the store file 787 * @param sf which file 788 */ 789 public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { 790 this.regionFs = regionFs; 791 this.sf = sf; 792 this.family = family; 793 } 794 795 @Override 796 public Pair<Path,Path> call() throws IOException { 797 return splitStoreFile(regionFs, family, sf); 798 } 799 } 800 801 /** 802 * Post split region actions before the Point-of-No-Return step 803 * @param env MasterProcedureEnv 804 **/ 805 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 806 throws IOException, InterruptedException { 807 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 808 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 809 if (cpHost != null) { 810 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 811 try { 812 for (Mutation p : metaEntries) { 813 RegionInfo.parseRegionName(p.getRow()); 814 } 815 } catch (IOException e) { 816 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 817 + "region name." 818 + "Mutations from coprocessor should only for hbase:meta table."); 819 throw e; 820 } 821 } 822 } 823 824 /** 825 * Add daughter regions to META 826 * @param env MasterProcedureEnv 827 */ 828 private void updateMeta(final MasterProcedureEnv env) throws IOException { 829 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 830 daughterOneRI, daughterTwoRI); 831 } 832 833 /** 834 * Pre split region actions after the Point-of-No-Return step 835 * @param env MasterProcedureEnv 836 **/ 837 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 838 throws IOException, InterruptedException { 839 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 840 if (cpHost != null) { 841 cpHost.preSplitAfterMETAAction(getUser()); 842 } 843 } 844 845 /** 846 * Post split region actions 847 * @param env MasterProcedureEnv 848 **/ 849 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 850 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 851 if (cpHost != null) { 852 cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser()); 853 } 854 } 855 856 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 857 return env.getMasterServices().getAssignmentManager().getRegionStates() 858 .getRegionServerOfRegion(getParentRegion()); 859 } 860 861 private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env) 862 throws IOException { 863 return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env, 864 Stream.of(getParentRegion()), getRegionReplication(env)); 865 } 866 867 private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) 868 throws IOException { 869 List<RegionInfo> hris = new ArrayList<RegionInfo>(2); 870 hris.add(daughterOneRI); 871 hris.add(daughterTwoRI); 872 return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris, 873 getRegionReplication(env), getParentRegionServerName(env)); 874 } 875 876 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 877 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 878 return htd.getRegionReplication(); 879 } 880 881 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 882 MasterFileSystem fs = env.getMasterFileSystem(); 883 long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(), 884 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 885 if (maxSequenceId > 0) { 886 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 887 getWALRegionDir(env, daughterOneRI), maxSequenceId); 888 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 889 getWALRegionDir(env, daughterTwoRI), maxSequenceId); 890 } 891 } 892 893 @Override 894 protected boolean abort(MasterProcedureEnv env) { 895 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 896 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 897 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 898 return isRollbackSupported(getCurrentState())? super.abort(env): false; 899 } 900}