001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Stream; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.TableDescriptor; 050import org.apache.hadoop.hbase.io.hfile.CacheConfig; 051import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 052import org.apache.hadoop.hbase.master.MasterFileSystem; 053import org.apache.hadoop.hbase.master.RegionState.State; 054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 060import org.apache.hadoop.hbase.quotas.QuotaExceededException; 061import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 062import org.apache.hadoop.hbase.regionserver.HStore; 063import org.apache.hadoop.hbase.regionserver.HStoreFile; 064import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 065import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 066import org.apache.hadoop.hbase.util.Bytes; 067import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 068import org.apache.hadoop.hbase.util.FSUtils; 069import org.apache.hadoop.hbase.util.Pair; 070import org.apache.hadoop.hbase.util.Threads; 071import org.apache.hadoop.hbase.wal.WALSplitUtil; 072import org.apache.hadoop.util.ReflectionUtils; 073import org.apache.yetus.audience.InterfaceAudience; 074import org.slf4j.Logger; 075import org.slf4j.LoggerFactory; 076 077import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 078 079import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 080import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 081import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 082import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 083 084/** 085 * The procedure to split a region in a table. 086 * Takes lock on the parent region. 087 * It holds the lock for the life of the procedure. 088 * <p>Throws exception on construction if determines context hostile to spllt (cluster going 089 * down or master is shutting down or table is disabled).</p> 090 */ 091@InterfaceAudience.Private 092public class SplitTableRegionProcedure 093 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 094 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 095 private RegionInfo daughter_1_RI; 096 private RegionInfo daughter_2_RI; 097 private byte[] bestSplitRow; 098 private RegionSplitPolicy splitPolicy; 099 100 public SplitTableRegionProcedure() { 101 // Required by the Procedure framework to create the procedure on replay 102 } 103 104 public SplitTableRegionProcedure(final MasterProcedureEnv env, 105 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 106 super(env, regionToSplit); 107 preflightChecks(env, true); 108 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 109 // we fail-fast on construction. There it skips the split with just a warning. 110 checkOnline(env, regionToSplit); 111 this.bestSplitRow = splitRow; 112 checkSplittable(env, regionToSplit, bestSplitRow); 113 final TableName table = regionToSplit.getTable(); 114 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 115 this.daughter_1_RI = RegionInfoBuilder.newBuilder(table) 116 .setStartKey(regionToSplit.getStartKey()) 117 .setEndKey(bestSplitRow) 118 .setSplit(false) 119 .setRegionId(rid) 120 .build(); 121 this.daughter_2_RI = RegionInfoBuilder.newBuilder(table) 122 .setStartKey(bestSplitRow) 123 .setEndKey(regionToSplit.getEndKey()) 124 .setSplit(false) 125 .setRegionId(rid) 126 .build(); 127 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 128 if(htd.getRegionSplitPolicyClassName() != null) { 129 // Since we don't have region reference here, creating the split policy instance without it. 130 // This can be used to invoke methods which don't require Region reference. This instantiation 131 // of a class on Master-side though it only makes sense on the RegionServer-side is 132 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 133 Class<? extends RegionSplitPolicy> clazz = 134 RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration()); 135 this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration()); 136 } 137 } 138 139 @Override 140 protected LockState acquireLock(final MasterProcedureEnv env) { 141 if (env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(), 142 daughter_1_RI, daughter_2_RI)) { 143 try { 144 LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); 145 } catch (IOException e) { 146 // Ignore, just for logging 147 } 148 return LockState.LOCK_EVENT_WAIT; 149 } 150 return LockState.LOCK_ACQUIRED; 151 } 152 153 @Override 154 protected void releaseLock(final MasterProcedureEnv env) { 155 env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughter_1_RI, 156 daughter_2_RI); 157 } 158 159 /** 160 * Check whether the region is splittable 161 * @param env MasterProcedureEnv 162 * @param regionToSplit parent Region to be split 163 * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS 164 * @throws IOException 165 */ 166 private void checkSplittable(final MasterProcedureEnv env, 167 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 168 // Ask the remote RS if this region is splittable. 169 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time. 170 if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 171 throw new IllegalArgumentException ("Can't invoke split on non-default regions directly"); 172 } 173 RegionStateNode node = 174 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 175 IOException splittableCheckIOE = null; 176 boolean splittable = false; 177 if (node != null) { 178 try { 179 if (bestSplitRow == null || bestSplitRow.length == 0) { 180 LOG 181 .info("splitKey isn't explicitly specified, will try to find a best split key from RS"); 182 } 183 // Always set bestSplitRow request as true here, 184 // need to call Region#checkSplit to check it splittable or not 185 GetRegionInfoResponse response = AssignmentManagerUtil.getRegionInfoResponse(env, 186 node.getRegionLocation(), node.getRegionInfo(), true); 187 if(bestSplitRow == null || bestSplitRow.length == 0) { 188 bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 189 } 190 splittable = response.hasSplittable() && response.getSplittable(); 191 192 if (LOG.isDebugEnabled()) { 193 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 194 } 195 } catch (IOException e) { 196 splittableCheckIOE = e; 197 } 198 } 199 200 if (!splittable) { 201 IOException e = 202 new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 203 if (splittableCheckIOE != null) { 204 e.initCause(splittableCheckIOE); 205 } 206 throw e; 207 } 208 209 if (bestSplitRow == null || bestSplitRow.length == 0) { 210 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " + 211 "maybe table is too small for auto split. For force split, try specifying split row"); 212 } 213 214 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 215 throw new DoNotRetryIOException( 216 "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow)); 217 } 218 219 if (!regionToSplit.containsRow(bestSplitRow)) { 220 throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" + 221 Bytes.toStringBinary(splitRow) + " region: " + regionToSplit); 222 } 223 } 224 225 /** 226 * Calculate daughter regionid to use. 227 * @param hri Parent {@link RegionInfo} 228 * @return Daughter region id (timestamp) to use. 229 */ 230 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 231 long rid = EnvironmentEdgeManager.currentTime(); 232 // Regionid is timestamp. Can't be less than that of parent else will insert 233 // at wrong location in hbase:meta (See HBASE-710). 234 if (rid < hri.getRegionId()) { 235 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() + 236 " but current time here is " + rid); 237 rid = hri.getRegionId() + 1; 238 } 239 return rid; 240 } 241 242 private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException { 243 AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()), 244 getRegionReplication(env)); 245 } 246 247 private void checkClosedRegions(MasterProcedureEnv env) throws IOException { 248 // theoretically this should not happen any more after we use TRSP, but anyway let's add a check 249 // here 250 AssignmentManagerUtil.checkClosedRegion(env, getParentRegion()); 251 } 252 253 @Override 254 protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state) 255 throws InterruptedException { 256 LOG.trace("{} execute state={}", this, state); 257 258 try { 259 switch (state) { 260 case SPLIT_TABLE_REGION_PREPARE: 261 if (prepareSplitRegion(env)) { 262 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 263 break; 264 } else { 265 return Flow.NO_MORE_STATE; 266 } 267 case SPLIT_TABLE_REGION_PRE_OPERATION: 268 preSplitRegion(env); 269 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 270 break; 271 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 272 addChildProcedure(createUnassignProcedures(env)); 273 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 274 break; 275 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 276 checkClosedRegions(env); 277 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 278 break; 279 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 280 removeNonDefaultReplicas(env); 281 createDaughterRegions(env); 282 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 283 break; 284 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 285 writeMaxSequenceIdFile(env); 286 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 287 break; 288 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 289 preSplitRegionBeforeMETA(env); 290 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 291 break; 292 case SPLIT_TABLE_REGION_UPDATE_META: 293 updateMeta(env); 294 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 295 break; 296 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 297 preSplitRegionAfterMETA(env); 298 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 299 break; 300 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 301 addChildProcedure(createAssignProcedures(env)); 302 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 303 break; 304 case SPLIT_TABLE_REGION_POST_OPERATION: 305 postSplitRegion(env); 306 return Flow.NO_MORE_STATE; 307 default: 308 throw new UnsupportedOperationException(this + " unhandled state=" + state); 309 } 310 } catch (IOException e) { 311 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 312 if (!isRollbackSupported(state)) { 313 // We reach a state that cannot be rolled back. We just need to keep retrying. 314 LOG.warn(msg, e); 315 } else { 316 LOG.error(msg, e); 317 setFailure("master-split-regions", e); 318 } 319 } 320 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 321 return Flow.HAS_MORE_STATE; 322 } 323 324 /** 325 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously 326 * submitted for parent region to be split (rollback doesn't wait on the completion of the 327 * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures. 328 * See HBASE-19851 for details. 329 */ 330 @Override 331 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 332 throws IOException, InterruptedException { 333 LOG.trace("{} rollback state={}", this, state); 334 335 try { 336 switch (state) { 337 case SPLIT_TABLE_REGION_POST_OPERATION: 338 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 339 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 340 case SPLIT_TABLE_REGION_UPDATE_META: 341 // PONR 342 throw new UnsupportedOperationException(this + " unhandled state=" + state); 343 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 344 break; 345 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 346 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 347 // Doing nothing, as re-open parent region would clean up daughter region directories. 348 break; 349 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 350 // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, 351 // we will bring parent region online 352 break; 353 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 354 openParentRegion(env); 355 break; 356 case SPLIT_TABLE_REGION_PRE_OPERATION: 357 postRollBackSplitRegion(env); 358 break; 359 case SPLIT_TABLE_REGION_PREPARE: 360 break; // nothing to do 361 default: 362 throw new UnsupportedOperationException(this + " unhandled state=" + state); 363 } 364 } catch (IOException e) { 365 // This will be retried. Unless there is a bug in the code, 366 // this should be just a "temporary error" (e.g. network down) 367 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + 368 " for splitting the region " 369 + getParentRegion().getEncodedName() + " in table " + getTableName(), e); 370 throw e; 371 } 372 } 373 374 /* 375 * Check whether we are in the state that can be rollback 376 */ 377 @Override 378 protected boolean isRollbackSupported(final SplitTableRegionState state) { 379 switch (state) { 380 case SPLIT_TABLE_REGION_POST_OPERATION: 381 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 382 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 383 case SPLIT_TABLE_REGION_UPDATE_META: 384 // It is not safe to rollback if we reach to these states. 385 return false; 386 default: 387 break; 388 } 389 return true; 390 } 391 392 @Override 393 protected SplitTableRegionState getState(final int stateId) { 394 return SplitTableRegionState.forNumber(stateId); 395 } 396 397 @Override 398 protected int getStateId(final SplitTableRegionState state) { 399 return state.getNumber(); 400 } 401 402 @Override 403 protected SplitTableRegionState getInitialState() { 404 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 405 } 406 407 @Override 408 protected void serializeStateData(ProcedureStateSerializer serializer) 409 throws IOException { 410 super.serializeStateData(serializer); 411 412 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 413 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 414 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 415 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 416 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI)) 417 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI)); 418 serializer.serialize(splitTableRegionMsg.build()); 419 } 420 421 @Override 422 protected void deserializeStateData(ProcedureStateSerializer serializer) 423 throws IOException { 424 super.deserializeStateData(serializer); 425 426 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 427 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 428 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 429 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 430 assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); 431 daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 432 daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 433 } 434 435 @Override 436 public void toStringClassDetails(StringBuilder sb) { 437 sb.append(getClass().getSimpleName()); 438 sb.append(" table="); 439 sb.append(getTableName()); 440 sb.append(", parent="); 441 sb.append(getParentRegion().getShortNameToLog()); 442 sb.append(", daughterA="); 443 sb.append(daughter_1_RI.getShortNameToLog()); 444 sb.append(", daughterB="); 445 sb.append(daughter_2_RI.getShortNameToLog()); 446 } 447 448 private RegionInfo getParentRegion() { 449 return getRegion(); 450 } 451 452 @Override 453 public TableOperationType getTableOperationType() { 454 return TableOperationType.REGION_SPLIT; 455 } 456 457 @Override 458 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 459 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 460 } 461 462 private byte[] getSplitRow() { 463 return daughter_2_RI.getStartKey(); 464 } 465 466 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 467 468 /** 469 * Prepare to Split region. 470 * @param env MasterProcedureEnv 471 */ 472 @VisibleForTesting 473 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 474 // Fail if we are taking snapshot for the given table 475 if (env.getMasterServices().getSnapshotManager() 476 .isTakingSnapshot(getParentRegion().getTable())) { 477 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() + 478 ", because we are taking snapshot for the table " + getParentRegion().getTable())); 479 return false; 480 } 481 // Check whether the region is splittable 482 RegionStateNode node = 483 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 484 485 if (node == null) { 486 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 487 } 488 489 RegionInfo parentHRI = node.getRegionInfo(); 490 if (parentHRI == null) { 491 LOG.info("Unsplittable; parent region is null; node={}", node); 492 return false; 493 } 494 // Lookup the parent HRI state from the AM, which has the latest updated info. 495 // Protect against the case where concurrent SPLIT requests came in and succeeded 496 // just before us. 497 if (node.isInState(State.SPLIT)) { 498 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 499 return false; 500 } 501 if (parentHRI.isSplit() || parentHRI.isOffline()) { 502 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 503 return false; 504 } 505 506 // expected parent to be online or closed 507 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 508 // We may have SPLIT already? 509 setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + 510 " FAILED because state=" + node.getState() + "; expected " + 511 Arrays.toString(EXPECTED_SPLIT_STATES))); 512 return false; 513 } 514 515 // Since we have the lock and the master is coordinating the operation 516 // we are always able to split the region 517 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 518 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 519 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() + 520 " failed due to split switch off")); 521 return false; 522 } 523 524 if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { 525 LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), 526 parentHRI); 527 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() 528 + " failed as region split is disabled for the table")); 529 return false; 530 } 531 532 // set node state as SPLITTING 533 node.setState(State.SPLITTING); 534 535 return true; 536 } 537 538 /** 539 * Action before splitting region in a table. 540 * @param env MasterProcedureEnv 541 */ 542 private void preSplitRegion(final MasterProcedureEnv env) 543 throws IOException, InterruptedException { 544 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 545 if (cpHost != null) { 546 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 547 } 548 549 // TODO: Clean up split and merge. Currently all over the place. 550 // Notify QuotaManager and RegionNormalizer 551 try { 552 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); 553 } catch (QuotaExceededException e) { 554 env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(), 555 NormalizationPlan.PlanType.SPLIT); 556 throw e; 557 } 558 } 559 560 /** 561 * Action after rollback a split table region action. 562 * @param env MasterProcedureEnv 563 */ 564 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 565 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 566 if (cpHost != null) { 567 cpHost.postRollBackSplitRegionAction(getUser()); 568 } 569 } 570 571 /** 572 * Rollback close parent region 573 */ 574 private void openParentRegion(MasterProcedureEnv env) throws IOException { 575 AssignmentManagerUtil.reopenRegionsForRollback(env, 576 Collections.singletonList((getParentRegion())), getRegionReplication(env), 577 getParentRegionServerName(env)); 578 } 579 580 /** 581 * Create daughter regions 582 */ 583 @VisibleForTesting 584 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 585 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 586 final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName()); 587 final FileSystem fs = mfs.getFileSystem(); 588 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 589 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 590 regionFs.createSplitsDir(daughter_1_RI, daughter_2_RI); 591 592 Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); 593 594 assertReferenceFileCount(fs, expectedReferences.getFirst(), 595 regionFs.getSplitsDir(daughter_1_RI)); 596 //Move the files from the temporary .splits to the final /table/region directory 597 regionFs.commitDaughterRegion(daughter_1_RI); 598 assertReferenceFileCount(fs, expectedReferences.getFirst(), 599 new Path(tabledir, daughter_1_RI.getEncodedName())); 600 601 assertReferenceFileCount(fs, expectedReferences.getSecond(), 602 regionFs.getSplitsDir(daughter_2_RI)); 603 regionFs.commitDaughterRegion(daughter_2_RI); 604 assertReferenceFileCount(fs, expectedReferences.getSecond(), 605 new Path(tabledir, daughter_2_RI.getEncodedName())); 606 } 607 608 /** 609 * Create Split directory 610 * @param env MasterProcedureEnv 611 */ 612 private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, 613 final HRegionFileSystem regionFs) throws IOException { 614 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 615 final Configuration conf = env.getMasterConfiguration(); 616 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 617 // The following code sets up a thread pool executor with as many slots as 618 // there's files to split. It then fires up everything, waits for 619 // completion and finally checks for any exception 620 // 621 // Note: splitStoreFiles creates daughter region dirs under the parent splits dir 622 // Nothing to unroll here if failure -- re-run createSplitsDir will 623 // clean this up. 624 int nbFiles = 0; 625 final Map<String, Collection<StoreFileInfo>> files = 626 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 627 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 628 String family = cfd.getNameAsString(); 629 Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family); 630 if (sfis == null) { 631 continue; 632 } 633 Collection<StoreFileInfo> filteredSfis = null; 634 for (StoreFileInfo sfi : sfis) { 635 // Filter. There is a lag cleaning up compacted reference files. They get cleared 636 // after a delay in case outstanding Scanners still have references. Because of this, 637 // the listing of the Store content may have straggler reference files. Skip these. 638 // It should be safe to skip references at this point because we checked above with 639 // the region if it thinks it is splittable and if we are here, it thinks it is 640 // splitable. 641 if (sfi.isReference()) { 642 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 643 continue; 644 } 645 if (filteredSfis == null) { 646 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 647 files.put(family, filteredSfis); 648 } 649 filteredSfis.add(sfi); 650 nbFiles++; 651 } 652 } 653 if (nbFiles == 0) { 654 // no file needs to be splitted. 655 return new Pair<Integer, Integer>(0, 0); 656 } 657 // Max #threads is the smaller of the number of storefiles or the default max determined above. 658 int maxThreads = Math.min( 659 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 660 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 661 nbFiles); 662 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" + 663 getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 664 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 665 Threads.newDaemonThreadFactory("StoreFileSplitter-%1$d")); 666 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 667 668 // Split each store file. 669 for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) { 670 byte[] familyName = Bytes.toBytes(e.getKey()); 671 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 672 final Collection<StoreFileInfo> storeFiles = e.getValue(); 673 if (storeFiles != null && storeFiles.size() > 0) { 674 for (StoreFileInfo storeFileInfo : storeFiles) { 675 // As this procedure is running on master, use CacheConfig.DISABLED means 676 // don't cache any block. 677 StoreFileSplitter sfs = 678 new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(), 679 storeFileInfo, conf, CacheConfig.DISABLED, hcd.getBloomFilterType(), true)); 680 futures.add(threadPool.submit(sfs)); 681 } 682 } 683 } 684 // Shutdown the pool 685 threadPool.shutdown(); 686 687 // Wait for all the tasks to finish. 688 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 689 // hbase.regionserver.fileSplitTimeout. If set, use its value. 690 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 691 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 692 try { 693 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 694 if (stillRunning) { 695 threadPool.shutdownNow(); 696 // wait for the thread to shutdown completely. 697 while (!threadPool.isTerminated()) { 698 Thread.sleep(50); 699 } 700 throw new IOException( 701 "Took too long to split the" + " files and create the references, aborting split"); 702 } 703 } catch (InterruptedException e) { 704 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 705 } 706 707 int daughterA = 0; 708 int daughterB = 0; 709 // Look for any exception 710 for (Future<Pair<Path, Path>> future : futures) { 711 try { 712 Pair<Path, Path> p = future.get(); 713 daughterA += p.getFirst() != null ? 1 : 0; 714 daughterB += p.getSecond() != null ? 1 : 0; 715 } catch (InterruptedException e) { 716 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 717 } catch (ExecutionException e) { 718 throw new IOException(e); 719 } 720 } 721 722 if (LOG.isDebugEnabled()) { 723 LOG.debug("pid=" + getProcId() + " split storefiles for region " + 724 getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA + 725 " storefiles, Daughter B: " + daughterB + " storefiles."); 726 } 727 return new Pair<Integer, Integer>(daughterA, daughterB); 728 } 729 730 private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount, 731 final Path dir) throws IOException { 732 if (expectedReferenceFileCount != 0 && 733 expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) { 734 throw new IOException("Failing split. Expected reference file count isn't equal."); 735 } 736 } 737 738 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) 739 throws IOException { 740 if (LOG.isDebugEnabled()) { 741 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + 742 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 743 } 744 745 final byte[] splitRow = getSplitRow(); 746 final String familyName = Bytes.toString(family); 747 final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow, 748 false, splitPolicy); 749 final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow, 750 true, splitPolicy); 751 if (LOG.isDebugEnabled()) { 752 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + 753 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 754 } 755 return new Pair<Path,Path>(path_first, path_second); 756 } 757 758 /** 759 * Utility class used to do the file splitting / reference writing 760 * in parallel instead of sequentially. 761 */ 762 private class StoreFileSplitter implements Callable<Pair<Path,Path>> { 763 private final HRegionFileSystem regionFs; 764 private final byte[] family; 765 private final HStoreFile sf; 766 767 /** 768 * Constructor that takes what it needs to split 769 * @param regionFs the file system 770 * @param family Family that contains the store file 771 * @param sf which file 772 */ 773 public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { 774 this.regionFs = regionFs; 775 this.sf = sf; 776 this.family = family; 777 } 778 779 @Override 780 public Pair<Path,Path> call() throws IOException { 781 return splitStoreFile(regionFs, family, sf); 782 } 783 } 784 785 /** 786 * Post split region actions before the Point-of-No-Return step 787 * @param env MasterProcedureEnv 788 **/ 789 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 790 throws IOException, InterruptedException { 791 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 792 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 793 if (cpHost != null) { 794 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 795 try { 796 for (Mutation p : metaEntries) { 797 RegionInfo.parseRegionName(p.getRow()); 798 } 799 } catch (IOException e) { 800 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 801 + "region name." 802 + "Mutations from coprocessor should only for hbase:meta table."); 803 throw e; 804 } 805 } 806 } 807 808 /** 809 * Add daughter regions to META 810 * @param env MasterProcedureEnv 811 */ 812 private void updateMeta(final MasterProcedureEnv env) throws IOException { 813 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 814 daughter_1_RI, daughter_2_RI); 815 } 816 817 /** 818 * Pre split region actions after the Point-of-No-Return step 819 * @param env MasterProcedureEnv 820 **/ 821 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 822 throws IOException, InterruptedException { 823 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 824 if (cpHost != null) { 825 cpHost.preSplitAfterMETAAction(getUser()); 826 } 827 } 828 829 /** 830 * Post split region actions 831 * @param env MasterProcedureEnv 832 **/ 833 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 834 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 835 if (cpHost != null) { 836 cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser()); 837 } 838 } 839 840 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 841 return env.getMasterServices().getAssignmentManager().getRegionStates() 842 .getRegionServerOfRegion(getParentRegion()); 843 } 844 845 private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env) 846 throws IOException { 847 return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env, 848 Stream.of(getParentRegion()), getRegionReplication(env)); 849 } 850 851 private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) 852 throws IOException { 853 List<RegionInfo> hris = new ArrayList<RegionInfo>(2); 854 hris.add(daughter_1_RI); 855 hris.add(daughter_2_RI); 856 return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris, 857 getRegionReplication(env), getParentRegionServerName(env)); 858 } 859 860 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 861 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 862 return htd.getRegionReplication(); 863 } 864 865 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 866 MasterFileSystem fs = env.getMasterFileSystem(); 867 long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(), 868 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 869 if (maxSequenceId > 0) { 870 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 871 getWALRegionDir(env, daughter_1_RI), maxSequenceId); 872 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 873 getWALRegionDir(env, daughter_2_RI), maxSequenceId); 874 } 875 } 876 877 @Override 878 protected boolean abort(MasterProcedureEnv env) { 879 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 880 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 881 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 882 return isRollbackSupported(getCurrentState())? super.abort(env): false; 883 } 884}