001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Stream; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.TableDescriptor; 050import org.apache.hadoop.hbase.io.hfile.CacheConfig; 051import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 052import org.apache.hadoop.hbase.master.MasterFileSystem; 053import org.apache.hadoop.hbase.master.RegionState.State; 054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 060import org.apache.hadoop.hbase.quotas.QuotaExceededException; 061import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 062import org.apache.hadoop.hbase.regionserver.HStore; 063import org.apache.hadoop.hbase.regionserver.HStoreFile; 064import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 065import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 066import org.apache.hadoop.hbase.util.Bytes; 067import org.apache.hadoop.hbase.util.CommonFSUtils; 068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 069import org.apache.hadoop.hbase.util.FSUtils; 070import org.apache.hadoop.hbase.util.Pair; 071import org.apache.hadoop.hbase.util.Threads; 072import org.apache.hadoop.hbase.wal.WALSplitUtil; 073import org.apache.hadoop.util.ReflectionUtils; 074import org.apache.yetus.audience.InterfaceAudience; 075import org.slf4j.Logger; 076import org.slf4j.LoggerFactory; 077 078import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 079 080import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 081import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 082import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 083import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 084 085/** 086 * The procedure to split a region in a table. 087 * Takes lock on the parent region. 088 * It holds the lock for the life of the procedure. 089 * <p>Throws exception on construction if determines context hostile to spllt (cluster going 090 * down or master is shutting down or table is disabled).</p> 091 */ 092@InterfaceAudience.Private 093public class SplitTableRegionProcedure 094 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 095 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 096 private RegionInfo daughterOneRI; 097 private RegionInfo daughterTwoRI; 098 private byte[] bestSplitRow; 099 private RegionSplitPolicy splitPolicy; 100 101 public SplitTableRegionProcedure() { 102 // Required by the Procedure framework to create the procedure on replay 103 } 104 105 public SplitTableRegionProcedure(final MasterProcedureEnv env, 106 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 107 super(env, regionToSplit); 108 preflightChecks(env, true); 109 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 110 // we fail-fast on construction. There it skips the split with just a warning. 111 checkOnline(env, regionToSplit); 112 this.bestSplitRow = splitRow; 113 checkSplittable(env, regionToSplit, bestSplitRow); 114 final TableName table = regionToSplit.getTable(); 115 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 116 this.daughterOneRI = RegionInfoBuilder.newBuilder(table) 117 .setStartKey(regionToSplit.getStartKey()) 118 .setEndKey(bestSplitRow) 119 .setSplit(false) 120 .setRegionId(rid) 121 .build(); 122 this.daughterTwoRI = RegionInfoBuilder.newBuilder(table) 123 .setStartKey(bestSplitRow) 124 .setEndKey(regionToSplit.getEndKey()) 125 .setSplit(false) 126 .setRegionId(rid) 127 .build(); 128 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 129 if(htd.getRegionSplitPolicyClassName() != null) { 130 // Since we don't have region reference here, creating the split policy instance without it. 131 // This can be used to invoke methods which don't require Region reference. This instantiation 132 // of a class on Master-side though it only makes sense on the RegionServer-side is 133 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 134 Class<? extends RegionSplitPolicy> clazz = 135 RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration()); 136 this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration()); 137 } 138 } 139 140 @Override 141 protected LockState acquireLock(final MasterProcedureEnv env) { 142 if (env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(), 143 daughterOneRI, daughterTwoRI)) { 144 try { 145 LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); 146 } catch (IOException e) { 147 // Ignore, just for logging 148 } 149 return LockState.LOCK_EVENT_WAIT; 150 } 151 return LockState.LOCK_ACQUIRED; 152 } 153 154 @Override 155 protected void releaseLock(final MasterProcedureEnv env) { 156 env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI, 157 daughterTwoRI); 158 } 159 160 @VisibleForTesting 161 public RegionInfo getDaughterOneRI() { 162 return daughterOneRI; 163 } 164 165 @VisibleForTesting 166 public RegionInfo getDaughterTwoRI() { 167 return daughterTwoRI; 168 } 169 170 /** 171 * Check whether the region is splittable 172 * @param env MasterProcedureEnv 173 * @param regionToSplit parent Region to be split 174 * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS 175 * @throws IOException 176 */ 177 private void checkSplittable(final MasterProcedureEnv env, 178 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 179 // Ask the remote RS if this region is splittable. 180 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time. 181 if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 182 throw new IllegalArgumentException ("Can't invoke split on non-default regions directly"); 183 } 184 RegionStateNode node = 185 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 186 IOException splittableCheckIOE = null; 187 boolean splittable = false; 188 if (node != null) { 189 try { 190 if (bestSplitRow == null || bestSplitRow.length == 0) { 191 LOG 192 .info("splitKey isn't explicitly specified, will try to find a best split key from RS"); 193 } 194 // Always set bestSplitRow request as true here, 195 // need to call Region#checkSplit to check it splittable or not 196 GetRegionInfoResponse response = AssignmentManagerUtil.getRegionInfoResponse(env, 197 node.getRegionLocation(), node.getRegionInfo(), true); 198 if(bestSplitRow == null || bestSplitRow.length == 0) { 199 bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 200 } 201 splittable = response.hasSplittable() && response.getSplittable(); 202 203 if (LOG.isDebugEnabled()) { 204 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 205 } 206 } catch (IOException e) { 207 splittableCheckIOE = e; 208 } 209 } 210 211 if (!splittable) { 212 IOException e = 213 new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 214 if (splittableCheckIOE != null) { 215 e.initCause(splittableCheckIOE); 216 } 217 throw e; 218 } 219 220 if (bestSplitRow == null || bestSplitRow.length == 0) { 221 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " + 222 "maybe table is too small for auto split. For force split, try specifying split row"); 223 } 224 225 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 226 throw new DoNotRetryIOException( 227 "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow)); 228 } 229 230 if (!regionToSplit.containsRow(bestSplitRow)) { 231 throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" + 232 Bytes.toStringBinary(splitRow) + " region: " + regionToSplit); 233 } 234 } 235 236 /** 237 * Calculate daughter regionid to use. 238 * @param hri Parent {@link RegionInfo} 239 * @return Daughter region id (timestamp) to use. 240 */ 241 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 242 long rid = EnvironmentEdgeManager.currentTime(); 243 // Regionid is timestamp. Can't be less than that of parent else will insert 244 // at wrong location in hbase:meta (See HBASE-710). 245 if (rid < hri.getRegionId()) { 246 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() + 247 " but current time here is " + rid); 248 rid = hri.getRegionId() + 1; 249 } 250 return rid; 251 } 252 253 private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException { 254 AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()), 255 getRegionReplication(env)); 256 } 257 258 private void checkClosedRegions(MasterProcedureEnv env) throws IOException { 259 // theoretically this should not happen any more after we use TRSP, but anyway let's add a check 260 // here 261 AssignmentManagerUtil.checkClosedRegion(env, getParentRegion()); 262 } 263 264 @Override 265 protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state) 266 throws InterruptedException { 267 LOG.trace("{} execute state={}", this, state); 268 269 try { 270 switch (state) { 271 case SPLIT_TABLE_REGION_PREPARE: 272 if (prepareSplitRegion(env)) { 273 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 274 break; 275 } else { 276 return Flow.NO_MORE_STATE; 277 } 278 case SPLIT_TABLE_REGION_PRE_OPERATION: 279 preSplitRegion(env); 280 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 281 break; 282 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 283 addChildProcedure(createUnassignProcedures(env)); 284 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 285 break; 286 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 287 checkClosedRegions(env); 288 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 289 break; 290 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 291 removeNonDefaultReplicas(env); 292 createDaughterRegions(env); 293 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 294 break; 295 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 296 writeMaxSequenceIdFile(env); 297 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 298 break; 299 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 300 preSplitRegionBeforeMETA(env); 301 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 302 break; 303 case SPLIT_TABLE_REGION_UPDATE_META: 304 updateMeta(env); 305 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 306 break; 307 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 308 preSplitRegionAfterMETA(env); 309 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 310 break; 311 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 312 addChildProcedure(createAssignProcedures(env)); 313 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 314 break; 315 case SPLIT_TABLE_REGION_POST_OPERATION: 316 postSplitRegion(env); 317 return Flow.NO_MORE_STATE; 318 default: 319 throw new UnsupportedOperationException(this + " unhandled state=" + state); 320 } 321 } catch (IOException e) { 322 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 323 if (!isRollbackSupported(state)) { 324 // We reach a state that cannot be rolled back. We just need to keep retrying. 325 LOG.warn(msg, e); 326 } else { 327 LOG.error(msg, e); 328 setFailure("master-split-regions", e); 329 } 330 } 331 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 332 return Flow.HAS_MORE_STATE; 333 } 334 335 /** 336 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously 337 * submitted for parent region to be split (rollback doesn't wait on the completion of the 338 * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures. 339 * See HBASE-19851 for details. 340 */ 341 @Override 342 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 343 throws IOException, InterruptedException { 344 LOG.trace("{} rollback state={}", this, state); 345 346 try { 347 switch (state) { 348 case SPLIT_TABLE_REGION_POST_OPERATION: 349 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 350 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 351 case SPLIT_TABLE_REGION_UPDATE_META: 352 // PONR 353 throw new UnsupportedOperationException(this + " unhandled state=" + state); 354 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 355 break; 356 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 357 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 358 // Doing nothing, as re-open parent region would clean up daughter region directories. 359 break; 360 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 361 // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, 362 // we will bring parent region online 363 break; 364 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 365 openParentRegion(env); 366 break; 367 case SPLIT_TABLE_REGION_PRE_OPERATION: 368 postRollBackSplitRegion(env); 369 break; 370 case SPLIT_TABLE_REGION_PREPARE: 371 break; // nothing to do 372 default: 373 throw new UnsupportedOperationException(this + " unhandled state=" + state); 374 } 375 } catch (IOException e) { 376 // This will be retried. Unless there is a bug in the code, 377 // this should be just a "temporary error" (e.g. network down) 378 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + 379 " for splitting the region " 380 + getParentRegion().getEncodedName() + " in table " + getTableName(), e); 381 throw e; 382 } 383 } 384 385 /* 386 * Check whether we are in the state that can be rollback 387 */ 388 @Override 389 protected boolean isRollbackSupported(final SplitTableRegionState state) { 390 switch (state) { 391 case SPLIT_TABLE_REGION_POST_OPERATION: 392 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 393 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 394 case SPLIT_TABLE_REGION_UPDATE_META: 395 // It is not safe to rollback if we reach to these states. 396 return false; 397 default: 398 break; 399 } 400 return true; 401 } 402 403 @Override 404 protected SplitTableRegionState getState(final int stateId) { 405 return SplitTableRegionState.forNumber(stateId); 406 } 407 408 @Override 409 protected int getStateId(final SplitTableRegionState state) { 410 return state.getNumber(); 411 } 412 413 @Override 414 protected SplitTableRegionState getInitialState() { 415 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 416 } 417 418 @Override 419 protected void serializeStateData(ProcedureStateSerializer serializer) 420 throws IOException { 421 super.serializeStateData(serializer); 422 423 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 424 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 425 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 426 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 427 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI)) 428 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI)); 429 serializer.serialize(splitTableRegionMsg.build()); 430 } 431 432 @Override 433 protected void deserializeStateData(ProcedureStateSerializer serializer) 434 throws IOException { 435 super.deserializeStateData(serializer); 436 437 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 438 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 439 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 440 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 441 assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); 442 daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 443 daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 444 } 445 446 @Override 447 public void toStringClassDetails(StringBuilder sb) { 448 sb.append(getClass().getSimpleName()); 449 sb.append(" table="); 450 sb.append(getTableName()); 451 sb.append(", parent="); 452 sb.append(getParentRegion().getShortNameToLog()); 453 sb.append(", daughterA="); 454 sb.append(daughterOneRI.getShortNameToLog()); 455 sb.append(", daughterB="); 456 sb.append(daughterTwoRI.getShortNameToLog()); 457 } 458 459 private RegionInfo getParentRegion() { 460 return getRegion(); 461 } 462 463 @Override 464 public TableOperationType getTableOperationType() { 465 return TableOperationType.REGION_SPLIT; 466 } 467 468 @Override 469 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 470 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 471 } 472 473 private byte[] getSplitRow() { 474 return daughterTwoRI.getStartKey(); 475 } 476 477 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 478 479 /** 480 * Prepare to Split region. 481 * @param env MasterProcedureEnv 482 */ 483 @VisibleForTesting 484 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 485 // Fail if we are taking snapshot for the given table 486 if (env.getMasterServices().getSnapshotManager() 487 .isTakingSnapshot(getParentRegion().getTable())) { 488 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() + 489 ", because we are taking snapshot for the table " + getParentRegion().getTable())); 490 return false; 491 } 492 // Check whether the region is splittable 493 RegionStateNode node = 494 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 495 496 if (node == null) { 497 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 498 } 499 500 RegionInfo parentHRI = node.getRegionInfo(); 501 if (parentHRI == null) { 502 LOG.info("Unsplittable; parent region is null; node={}", node); 503 return false; 504 } 505 // Lookup the parent HRI state from the AM, which has the latest updated info. 506 // Protect against the case where concurrent SPLIT requests came in and succeeded 507 // just before us. 508 if (node.isInState(State.SPLIT)) { 509 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 510 return false; 511 } 512 if (parentHRI.isSplit() || parentHRI.isOffline()) { 513 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 514 return false; 515 } 516 517 // expected parent to be online or closed 518 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 519 // We may have SPLIT already? 520 setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + 521 " FAILED because state=" + node.getState() + "; expected " + 522 Arrays.toString(EXPECTED_SPLIT_STATES))); 523 return false; 524 } 525 526 // Mostly this check is not used because we already check the switch before submit a split 527 // procedure. Just for safe, check the switch again. This procedure can be rollbacked if 528 // the switch was set to false after submit. 529 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 530 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 531 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() + 532 " failed due to split switch off")); 533 return false; 534 } 535 536 if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { 537 LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), 538 parentHRI); 539 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() 540 + " failed as region split is disabled for the table")); 541 return false; 542 } 543 544 // set node state as SPLITTING 545 node.setState(State.SPLITTING); 546 547 // Since we have the lock and the master is coordinating the operation 548 // we are always able to split the region 549 return true; 550 } 551 552 /** 553 * Action before splitting region in a table. 554 * @param env MasterProcedureEnv 555 */ 556 private void preSplitRegion(final MasterProcedureEnv env) 557 throws IOException, InterruptedException { 558 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 559 if (cpHost != null) { 560 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 561 } 562 563 // TODO: Clean up split and merge. Currently all over the place. 564 // Notify QuotaManager and RegionNormalizer 565 try { 566 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); 567 } catch (QuotaExceededException e) { 568 env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(), 569 NormalizationPlan.PlanType.SPLIT); 570 throw e; 571 } 572 } 573 574 /** 575 * Action after rollback a split table region action. 576 * @param env MasterProcedureEnv 577 */ 578 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 579 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 580 if (cpHost != null) { 581 cpHost.postRollBackSplitRegionAction(getUser()); 582 } 583 } 584 585 /** 586 * Rollback close parent region 587 */ 588 private void openParentRegion(MasterProcedureEnv env) throws IOException { 589 AssignmentManagerUtil.reopenRegionsForRollback(env, 590 Collections.singletonList((getParentRegion())), getRegionReplication(env), 591 getParentRegionServerName(env)); 592 } 593 594 /** 595 * Create daughter regions 596 */ 597 @VisibleForTesting 598 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 599 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 600 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 601 final FileSystem fs = mfs.getFileSystem(); 602 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 603 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 604 regionFs.createSplitsDir(daughterOneRI, daughterTwoRI); 605 606 Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); 607 608 assertReferenceFileCount(fs, expectedReferences.getFirst(), 609 regionFs.getSplitsDir(daughterOneRI)); 610 //Move the files from the temporary .splits to the final /table/region directory 611 regionFs.commitDaughterRegion(daughterOneRI); 612 assertReferenceFileCount(fs, expectedReferences.getFirst(), 613 new Path(tabledir, daughterOneRI.getEncodedName())); 614 615 assertReferenceFileCount(fs, expectedReferences.getSecond(), 616 regionFs.getSplitsDir(daughterTwoRI)); 617 regionFs.commitDaughterRegion(daughterTwoRI); 618 assertReferenceFileCount(fs, expectedReferences.getSecond(), 619 new Path(tabledir, daughterTwoRI.getEncodedName())); 620 } 621 622 /** 623 * Create Split directory 624 * @param env MasterProcedureEnv 625 */ 626 private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, 627 final HRegionFileSystem regionFs) throws IOException { 628 final Configuration conf = env.getMasterConfiguration(); 629 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 630 // The following code sets up a thread pool executor with as many slots as 631 // there's files to split. It then fires up everything, waits for 632 // completion and finally checks for any exception 633 // 634 // Note: splitStoreFiles creates daughter region dirs under the parent splits dir 635 // Nothing to unroll here if failure -- re-run createSplitsDir will 636 // clean this up. 637 int nbFiles = 0; 638 final Map<String, Collection<StoreFileInfo>> files = 639 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 640 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 641 String family = cfd.getNameAsString(); 642 Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family); 643 if (sfis == null) { 644 continue; 645 } 646 Collection<StoreFileInfo> filteredSfis = null; 647 for (StoreFileInfo sfi : sfis) { 648 // Filter. There is a lag cleaning up compacted reference files. They get cleared 649 // after a delay in case outstanding Scanners still have references. Because of this, 650 // the listing of the Store content may have straggler reference files. Skip these. 651 // It should be safe to skip references at this point because we checked above with 652 // the region if it thinks it is splittable and if we are here, it thinks it is 653 // splitable. 654 if (sfi.isReference()) { 655 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 656 continue; 657 } 658 if (filteredSfis == null) { 659 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 660 files.put(family, filteredSfis); 661 } 662 filteredSfis.add(sfi); 663 nbFiles++; 664 } 665 } 666 if (nbFiles == 0) { 667 // no file needs to be splitted. 668 return new Pair<Integer, Integer>(0, 0); 669 } 670 // Max #threads is the smaller of the number of storefiles or the default max determined above. 671 int maxThreads = Math.min( 672 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 673 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 674 nbFiles); 675 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" + 676 getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 677 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 678 Threads.newDaemonThreadFactory("StoreFileSplitter-%1$d")); 679 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 680 681 // Split each store file. 682 for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) { 683 byte[] familyName = Bytes.toBytes(e.getKey()); 684 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 685 final Collection<StoreFileInfo> storeFiles = e.getValue(); 686 if (storeFiles != null && storeFiles.size() > 0) { 687 for (StoreFileInfo storeFileInfo : storeFiles) { 688 // As this procedure is running on master, use CacheConfig.DISABLED means 689 // don't cache any block. 690 StoreFileSplitter sfs = 691 new StoreFileSplitter(regionFs, familyName, new HStoreFile( 692 storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); 693 futures.add(threadPool.submit(sfs)); 694 } 695 } 696 } 697 // Shutdown the pool 698 threadPool.shutdown(); 699 700 // Wait for all the tasks to finish. 701 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 702 // hbase.regionserver.fileSplitTimeout. If set, use its value. 703 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 704 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 705 try { 706 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 707 if (stillRunning) { 708 threadPool.shutdownNow(); 709 // wait for the thread to shutdown completely. 710 while (!threadPool.isTerminated()) { 711 Thread.sleep(50); 712 } 713 throw new IOException( 714 "Took too long to split the" + " files and create the references, aborting split"); 715 } 716 } catch (InterruptedException e) { 717 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 718 } 719 720 int daughterA = 0; 721 int daughterB = 0; 722 // Look for any exception 723 for (Future<Pair<Path, Path>> future : futures) { 724 try { 725 Pair<Path, Path> p = future.get(); 726 daughterA += p.getFirst() != null ? 1 : 0; 727 daughterB += p.getSecond() != null ? 1 : 0; 728 } catch (InterruptedException e) { 729 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 730 } catch (ExecutionException e) { 731 throw new IOException(e); 732 } 733 } 734 735 if (LOG.isDebugEnabled()) { 736 LOG.debug("pid=" + getProcId() + " split storefiles for region " + 737 getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA + 738 " storefiles, Daughter B: " + daughterB + " storefiles."); 739 } 740 return new Pair<Integer, Integer>(daughterA, daughterB); 741 } 742 743 private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount, 744 final Path dir) throws IOException { 745 if (expectedReferenceFileCount != 0 && 746 expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) { 747 throw new IOException("Failing split. Expected reference file count isn't equal."); 748 } 749 } 750 751 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) 752 throws IOException { 753 if (LOG.isDebugEnabled()) { 754 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + 755 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 756 } 757 758 final byte[] splitRow = getSplitRow(); 759 final String familyName = Bytes.toString(family); 760 final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow, 761 false, splitPolicy); 762 final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow, 763 true, splitPolicy); 764 if (LOG.isDebugEnabled()) { 765 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + 766 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 767 } 768 return new Pair<Path,Path>(path_first, path_second); 769 } 770 771 /** 772 * Utility class used to do the file splitting / reference writing 773 * in parallel instead of sequentially. 774 */ 775 private class StoreFileSplitter implements Callable<Pair<Path,Path>> { 776 private final HRegionFileSystem regionFs; 777 private final byte[] family; 778 private final HStoreFile sf; 779 780 /** 781 * Constructor that takes what it needs to split 782 * @param regionFs the file system 783 * @param family Family that contains the store file 784 * @param sf which file 785 */ 786 public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { 787 this.regionFs = regionFs; 788 this.sf = sf; 789 this.family = family; 790 } 791 792 @Override 793 public Pair<Path,Path> call() throws IOException { 794 return splitStoreFile(regionFs, family, sf); 795 } 796 } 797 798 /** 799 * Post split region actions before the Point-of-No-Return step 800 * @param env MasterProcedureEnv 801 **/ 802 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 803 throws IOException, InterruptedException { 804 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 805 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 806 if (cpHost != null) { 807 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 808 try { 809 for (Mutation p : metaEntries) { 810 RegionInfo.parseRegionName(p.getRow()); 811 } 812 } catch (IOException e) { 813 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 814 + "region name." 815 + "Mutations from coprocessor should only for hbase:meta table."); 816 throw e; 817 } 818 } 819 } 820 821 /** 822 * Add daughter regions to META 823 * @param env MasterProcedureEnv 824 */ 825 private void updateMeta(final MasterProcedureEnv env) throws IOException { 826 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 827 daughterOneRI, daughterTwoRI); 828 } 829 830 /** 831 * Pre split region actions after the Point-of-No-Return step 832 * @param env MasterProcedureEnv 833 **/ 834 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 835 throws IOException, InterruptedException { 836 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 837 if (cpHost != null) { 838 cpHost.preSplitAfterMETAAction(getUser()); 839 } 840 } 841 842 /** 843 * Post split region actions 844 * @param env MasterProcedureEnv 845 **/ 846 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 847 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 848 if (cpHost != null) { 849 cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser()); 850 } 851 } 852 853 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 854 return env.getMasterServices().getAssignmentManager().getRegionStates() 855 .getRegionServerOfRegion(getParentRegion()); 856 } 857 858 private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env) 859 throws IOException { 860 return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env, 861 Stream.of(getParentRegion()), getRegionReplication(env)); 862 } 863 864 private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) 865 throws IOException { 866 List<RegionInfo> hris = new ArrayList<RegionInfo>(2); 867 hris.add(daughterOneRI); 868 hris.add(daughterTwoRI); 869 return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris, 870 getRegionReplication(env), getParentRegionServerName(env)); 871 } 872 873 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 874 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 875 return htd.getRegionReplication(); 876 } 877 878 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 879 MasterFileSystem fs = env.getMasterFileSystem(); 880 long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(), 881 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 882 if (maxSequenceId > 0) { 883 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 884 getWALRegionDir(env, daughterOneRI), maxSequenceId); 885 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 886 getWALRegionDir(env, daughterTwoRI), maxSequenceId); 887 } 888 } 889 890 @Override 891 protected boolean abort(MasterProcedureEnv env) { 892 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 893 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 894 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 895 return isRollbackSupported(getCurrentState())? super.abort(env): false; 896 } 897}