001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.master.assignment; 020 021import java.io.IOException; 022import java.io.InterruptedIOException; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.Collection; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Collectors; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionReplicaUtil; 050import org.apache.hadoop.hbase.client.TableDescriptor; 051import org.apache.hadoop.hbase.io.hfile.CacheConfig; 052import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 053import org.apache.hadoop.hbase.master.MasterFileSystem; 054import org.apache.hadoop.hbase.master.RegionState.State; 055import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; 056import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 057import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 058import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; 059import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 060import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 061import org.apache.hadoop.hbase.master.procedure.TableQueue; 062import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 063import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 064import org.apache.hadoop.hbase.quotas.QuotaExceededException; 065import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 066import org.apache.hadoop.hbase.regionserver.HStore; 067import org.apache.hadoop.hbase.regionserver.HStoreFile; 068import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 069import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 070import org.apache.hadoop.hbase.util.Bytes; 071import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 072import org.apache.hadoop.hbase.util.FSUtils; 073import org.apache.hadoop.hbase.util.Pair; 074import org.apache.hadoop.hbase.util.Threads; 075import org.apache.hadoop.hbase.wal.WALSplitter; 076import org.apache.hadoop.util.ReflectionUtils; 077import org.apache.yetus.audience.InterfaceAudience; 078import org.slf4j.Logger; 079import org.slf4j.LoggerFactory; 080 081import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 082 083import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 084import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 085import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 086import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 087 088/** 089 * The procedure to split a region in a table. 090 * Takes lock on the parent region. 091 * It holds the lock for the life of the procedure. 092 * <p>Throws exception on construction if determines context hostile to spllt (cluster going 093 * down or master is shutting down or table is disabled).</p> 094 */ 095@InterfaceAudience.Private 096public class SplitTableRegionProcedure 097 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 098 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 099 private Boolean traceEnabled = null; 100 private RegionInfo daughter_1_RI; 101 private RegionInfo daughter_2_RI; 102 private byte[] bestSplitRow; 103 private RegionSplitPolicy splitPolicy; 104 105 public SplitTableRegionProcedure() { 106 // Required by the Procedure framework to create the procedure on replay 107 } 108 109 public SplitTableRegionProcedure(final MasterProcedureEnv env, 110 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 111 super(env, regionToSplit); 112 preflightChecks(env, true); 113 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 114 // we fail-fast on construction. There it skips the split with just a warning. 115 checkOnline(env, regionToSplit); 116 this.bestSplitRow = splitRow; 117 checkSplittable(env, regionToSplit, bestSplitRow); 118 final TableName table = regionToSplit.getTable(); 119 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 120 this.daughter_1_RI = RegionInfoBuilder.newBuilder(table) 121 .setStartKey(regionToSplit.getStartKey()) 122 .setEndKey(bestSplitRow) 123 .setSplit(false) 124 .setRegionId(rid) 125 .build(); 126 this.daughter_2_RI = RegionInfoBuilder.newBuilder(table) 127 .setStartKey(bestSplitRow) 128 .setEndKey(regionToSplit.getEndKey()) 129 .setSplit(false) 130 .setRegionId(rid) 131 .build(); 132 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 133 if(htd.getRegionSplitPolicyClassName() != null) { 134 // Since we don't have region reference here, creating the split policy instance without it. 135 // This can be used to invoke methods which don't require Region reference. This instantiation 136 // of a class on Master-side though it only makes sense on the RegionServer-side is 137 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 138 Class<? extends RegionSplitPolicy> clazz = 139 RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration()); 140 this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration()); 141 } 142 } 143 144 /** 145 * Check whether there are recovered.edits in the parent closed region. 146 * @param env master env 147 * @throws IOException IOException 148 */ 149 static boolean hasRecoveredEdits(MasterProcedureEnv env, RegionInfo ri) throws IOException { 150 return WALSplitter.hasRecoveredEdits(env.getMasterConfiguration(), ri); 151 } 152 153 /** 154 * Check whether the region is splittable 155 * @param env MasterProcedureEnv 156 * @param regionToSplit parent Region to be split 157 * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS 158 * @throws IOException 159 */ 160 private void checkSplittable(final MasterProcedureEnv env, 161 final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { 162 // Ask the remote RS if this region is splittable. 163 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time. 164 if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 165 throw new IllegalArgumentException ("Can't invoke split on non-default regions directly"); 166 } 167 RegionStateNode node = 168 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 169 IOException splittableCheckIOE = null; 170 boolean splittable = false; 171 if (node != null) { 172 try { 173 if (bestSplitRow == null || bestSplitRow.length == 0) { 174 LOG.info("splitKey isn't explicitly specified, " + " will try to find a best split key from RS"); 175 } 176 // Always set bestSplitRow request as true here, 177 // need to call Region#checkSplit to check it splittable or not 178 GetRegionInfoResponse response = 179 Util.getRegionInfoResponse(env, node.getRegionLocation(), node.getRegionInfo(), true); 180 if(bestSplitRow == null || bestSplitRow.length == 0) { 181 bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 182 } 183 splittable = response.hasSplittable() && response.getSplittable(); 184 185 if (LOG.isDebugEnabled()) { 186 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 187 } 188 } catch (IOException e) { 189 splittableCheckIOE = e; 190 } 191 } 192 193 if (!splittable) { 194 IOException e = new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 195 if (splittableCheckIOE != null) e.initCause(splittableCheckIOE); 196 throw e; 197 } 198 199 if(bestSplitRow == null || bestSplitRow.length == 0) { 200 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " 201 + "maybe table is too small for auto split. For force split, try specifying split row"); 202 } 203 204 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 205 throw new DoNotRetryIOException( 206 "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow)); 207 } 208 209 if (!regionToSplit.containsRow(bestSplitRow)) { 210 throw new DoNotRetryIOException( 211 "Split row is not inside region key range splitKey:" + Bytes.toStringBinary(splitRow) + 212 " region: " + regionToSplit); 213 } 214 } 215 216 /** 217 * Calculate daughter regionid to use. 218 * @param hri Parent {@link RegionInfo} 219 * @return Daughter region id (timestamp) to use. 220 */ 221 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 222 long rid = EnvironmentEdgeManager.currentTime(); 223 // Regionid is timestamp. Can't be less than that of parent else will insert 224 // at wrong location in hbase:meta (See HBASE-710). 225 if (rid < hri.getRegionId()) { 226 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() + 227 " but current time here is " + rid); 228 rid = hri.getRegionId() + 1; 229 } 230 return rid; 231 } 232 233 @Override 234 protected Flow executeFromState(final MasterProcedureEnv env, final SplitTableRegionState state) 235 throws InterruptedException { 236 LOG.trace("{} execute state={}", this, state); 237 238 try { 239 switch (state) { 240 case SPLIT_TABLE_REGION_PREPARE: 241 if (prepareSplitRegion(env)) { 242 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 243 break; 244 } else { 245 return Flow.NO_MORE_STATE; 246 } 247 case SPLIT_TABLE_REGION_PRE_OPERATION: 248 preSplitRegion(env); 249 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 250 break; 251 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 252 addChildProcedure(createUnassignProcedures(env, getRegionReplication(env))); 253 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 254 break; 255 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 256 if (hasRecoveredEdits(env, getRegion())) { 257 // If recovered edits, reopen parent region and then re-run the close by going back to 258 // SPLIT_TABLE_REGION_CLOSE_PARENT_REGION. We might have to cycle here a few times 259 // (TODO: Add being able to open a region in read-only mode). Open the primary replica 260 // in this case only where we just want to pickup the left-out replicated.edits. 261 LOG.info("Found recovered.edits under {}, reopen so we pickup these missed edits!", 262 getRegion().getEncodedName()); 263 addChildProcedure(env.getAssignmentManager().createAssignProcedure(getParentRegion())); 264 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 265 } else { 266 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 267 } 268 break; 269 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 270 createDaughterRegions(env); 271 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 272 break; 273 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 274 writeMaxSequenceIdFile(env); 275 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 276 break; 277 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 278 preSplitRegionBeforeMETA(env); 279 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 280 break; 281 case SPLIT_TABLE_REGION_UPDATE_META: 282 updateMetaForDaughterRegions(env); 283 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 284 break; 285 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 286 preSplitRegionAfterMETA(env); 287 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 288 break; 289 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 290 addChildProcedure(createAssignProcedures(env, getRegionReplication(env))); 291 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 292 break; 293 case SPLIT_TABLE_REGION_POST_OPERATION: 294 postSplitRegion(env); 295 return Flow.NO_MORE_STATE; 296 default: 297 throw new UnsupportedOperationException(this + " unhandled state=" + state); 298 } 299 } catch (IOException e) { 300 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 301 if (!isRollbackSupported(state)) { 302 // We reach a state that cannot be rolled back. We just need to keep retrying. 303 LOG.warn(msg, e); 304 } else { 305 LOG.error(msg, e); 306 setFailure("master-split-regions", e); 307 } 308 } 309 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 310 return Flow.HAS_MORE_STATE; 311 } 312 313 /** 314 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously 315 * submitted for parent region to be split (rollback doesn't wait on the completion of the 316 * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures. 317 * See HBASE-19851 for details. 318 */ 319 @Override 320 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 321 throws IOException, InterruptedException { 322 if (isTraceEnabled()) { 323 LOG.trace(this + " rollback state=" + state); 324 } 325 326 try { 327 switch (state) { 328 case SPLIT_TABLE_REGION_POST_OPERATION: 329 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 330 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 331 case SPLIT_TABLE_REGION_UPDATE_META: 332 // PONR 333 throw new UnsupportedOperationException(this + " unhandled state=" + state); 334 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 335 break; 336 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 337 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 338 // Doing nothing, as re-open parent region would clean up daughter region directories. 339 break; 340 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 341 // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, 342 // we will bring parent region online 343 break; 344 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 345 openParentRegion(env); 346 break; 347 case SPLIT_TABLE_REGION_PRE_OPERATION: 348 postRollBackSplitRegion(env); 349 break; 350 case SPLIT_TABLE_REGION_PREPARE: 351 break; // nothing to do 352 default: 353 throw new UnsupportedOperationException(this + " unhandled state=" + state); 354 } 355 } catch (IOException e) { 356 // This will be retried. Unless there is a bug in the code, 357 // this should be just a "temporary error" (e.g. network down) 358 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + 359 " for splitting the region " 360 + getParentRegion().getEncodedName() + " in table " + getTableName(), e); 361 throw e; 362 } 363 } 364 365 /* 366 * Check whether we are in the state that can be rollback 367 */ 368 @Override 369 protected boolean isRollbackSupported(final SplitTableRegionState state) { 370 switch (state) { 371 case SPLIT_TABLE_REGION_POST_OPERATION: 372 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 373 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 374 case SPLIT_TABLE_REGION_UPDATE_META: 375 // It is not safe to rollback if we reach to these states. 376 return false; 377 default: 378 break; 379 } 380 return true; 381 } 382 383 @Override 384 protected SplitTableRegionState getState(final int stateId) { 385 return SplitTableRegionState.forNumber(stateId); 386 } 387 388 @Override 389 protected int getStateId(final SplitTableRegionState state) { 390 return state.getNumber(); 391 } 392 393 @Override 394 protected SplitTableRegionState getInitialState() { 395 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 396 } 397 398 @Override 399 protected void serializeStateData(ProcedureStateSerializer serializer) 400 throws IOException { 401 super.serializeStateData(serializer); 402 403 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 404 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 405 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 406 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 407 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI)) 408 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI)); 409 serializer.serialize(splitTableRegionMsg.build()); 410 } 411 412 @Override 413 protected void deserializeStateData(ProcedureStateSerializer serializer) 414 throws IOException { 415 super.deserializeStateData(serializer); 416 417 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 418 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 419 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 420 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 421 assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); 422 daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 423 daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 424 } 425 426 @Override 427 public void toStringClassDetails(StringBuilder sb) { 428 sb.append(getClass().getSimpleName()); 429 sb.append(" table="); 430 sb.append(getTableName()); 431 sb.append(", parent="); 432 sb.append(getParentRegion().getShortNameToLog()); 433 sb.append(", daughterA="); 434 sb.append(daughter_1_RI.getShortNameToLog()); 435 sb.append(", daughterB="); 436 sb.append(daughter_2_RI.getShortNameToLog()); 437 } 438 439 private RegionInfo getParentRegion() { 440 return getRegion(); 441 } 442 443 @Override 444 public TableOperationType getTableOperationType() { 445 return TableOperationType.REGION_SPLIT; 446 } 447 448 @Override 449 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 450 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 451 } 452 453 private byte[] getSplitRow() { 454 return daughter_2_RI.getStartKey(); 455 } 456 457 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 458 459 /** 460 * Prepare to Split region. 461 * @param env MasterProcedureEnv 462 */ 463 @VisibleForTesting 464 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 465 // Fail if we are taking snapshot for the given table 466 if (env.getMasterServices().getSnapshotManager() 467 .isTakingSnapshot(getParentRegion().getTable())) { 468 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() + 469 ", because we are taking snapshot for the table " + getParentRegion().getTable())); 470 return false; 471 } 472 // Check whether the region is splittable 473 RegionStateNode node = 474 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 475 476 if (node == null) { 477 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 478 } 479 480 RegionInfo parentHRI = node.getRegionInfo(); 481 if (parentHRI == null) { 482 LOG.info("Unsplittable; parent region is null; node={}", node); 483 return false; 484 } 485 // Lookup the parent HRI state from the AM, which has the latest updated info. 486 // Protect against the case where concurrent SPLIT requests came in and succeeded 487 // just before us. 488 if (node.isInState(State.SPLIT)) { 489 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 490 return false; 491 } 492 if (parentHRI.isSplit() || parentHRI.isOffline()) { 493 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 494 return false; 495 } 496 497 // expected parent to be online or closed 498 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 499 // We may have SPLIT already? 500 setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + 501 " FAILED because state=" + node.getState() + "; expected " + 502 Arrays.toString(EXPECTED_SPLIT_STATES))); 503 return false; 504 } 505 506 // Since we have the lock and the master is coordinating the operation 507 // we are always able to split the region 508 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 509 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 510 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() + 511 " failed due to split switch off")); 512 return false; 513 } 514 515 // See HBASE-21395, for 2.0.x and 2.1.x only. 516 // A safe fence here, if there is a table procedure going on, abort the split. 517 // There some cases that may lead to table procedure roll back (more serious 518 // than roll back the split procedure here), or the split parent was brought online 519 // by the table procedure because of the race between split procedure and table procedure 520 List<AbstractStateMachineTableProcedure> tableProcedures = env 521 .getMasterServices().getProcedures().stream() 522 .filter(p -> p instanceof AbstractStateMachineTableProcedure) 523 .map(p -> (AbstractStateMachineTableProcedure) p) 524 .filter(p -> p.getTableName().equals(getParentRegion().getTable()) && 525 !p.isFinished() && TableQueue.requireTableExclusiveLock(p)) 526 .collect(Collectors.toList()); 527 if (tableProcedures != null && tableProcedures.size() > 0) { 528 throw new DoNotRetryIOException(tableProcedures.get(0).toString() 529 + " is going on against the same table, abort the split of " + this 530 .toString()); 531 } 532 533 // set node state as SPLITTING 534 node.setState(State.SPLITTING); 535 536 return true; 537 } 538 539 /** 540 * Action before splitting region in a table. 541 * @param env MasterProcedureEnv 542 */ 543 private void preSplitRegion(final MasterProcedureEnv env) 544 throws IOException, InterruptedException { 545 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 546 if (cpHost != null) { 547 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 548 } 549 550 // TODO: Clean up split and merge. Currently all over the place. 551 // Notify QuotaManager and RegionNormalizer 552 try { 553 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); 554 } catch (QuotaExceededException e) { 555 env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(), 556 NormalizationPlan.PlanType.SPLIT); 557 throw e; 558 } 559 } 560 561 /** 562 * Action after rollback a split table region action. 563 * @param env MasterProcedureEnv 564 */ 565 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 566 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 567 if (cpHost != null) { 568 cpHost.postRollBackSplitRegionAction(getUser()); 569 } 570 } 571 572 /** 573 * Rollback close parent region 574 * @param env MasterProcedureEnv 575 */ 576 private void openParentRegion(final MasterProcedureEnv env) throws IOException { 577 // Check whether the region is closed; if so, open it in the same server 578 final int regionReplication = getRegionReplication(env); 579 final ServerName serverName = getParentRegionServerName(env); 580 581 final AssignProcedure[] procs = new AssignProcedure[regionReplication]; 582 for (int i = 0; i < regionReplication; ++i) { 583 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); 584 procs[i] = env.getAssignmentManager().createAssignProcedure(hri, serverName); 585 } 586 env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); 587 } 588 589 /** 590 * Create daughter regions 591 * @param env MasterProcedureEnv 592 */ 593 @VisibleForTesting 594 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 595 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 596 final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName()); 597 final FileSystem fs = mfs.getFileSystem(); 598 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 599 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 600 regionFs.createSplitsDir(daughter_1_RI, daughter_2_RI); 601 602 Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); 603 604 assertReferenceFileCount(fs, expectedReferences.getFirst(), 605 regionFs.getSplitsDir(daughter_1_RI)); 606 //Move the files from the temporary .splits to the final /table/region directory 607 regionFs.commitDaughterRegion(daughter_1_RI); 608 assertReferenceFileCount(fs, expectedReferences.getFirst(), 609 new Path(tabledir, daughter_1_RI.getEncodedName())); 610 611 assertReferenceFileCount(fs, expectedReferences.getSecond(), 612 regionFs.getSplitsDir(daughter_2_RI)); 613 regionFs.commitDaughterRegion(daughter_2_RI); 614 assertReferenceFileCount(fs, expectedReferences.getSecond(), 615 new Path(tabledir, daughter_2_RI.getEncodedName())); 616 } 617 618 /** 619 * Create Split directory 620 * @param env MasterProcedureEnv 621 */ 622 private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, 623 final HRegionFileSystem regionFs) throws IOException { 624 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 625 final Configuration conf = env.getMasterConfiguration(); 626 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 627 // The following code sets up a thread pool executor with as many slots as 628 // there's files to split. It then fires up everything, waits for 629 // completion and finally checks for any exception 630 // 631 // Note: splitStoreFiles creates daughter region dirs under the parent splits dir 632 // Nothing to unroll here if failure -- re-run createSplitsDir will 633 // clean this up. 634 int nbFiles = 0; 635 final Map<String, Collection<StoreFileInfo>> files = 636 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 637 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 638 String family = cfd.getNameAsString(); 639 Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family); 640 if (sfis == null) { 641 continue; 642 } 643 Collection<StoreFileInfo> filteredSfis = null; 644 for (StoreFileInfo sfi : sfis) { 645 // Filter. There is a lag cleaning up compacted reference files. They get cleared 646 // after a delay in case outstanding Scanners still have references. Because of this, 647 // the listing of the Store content may have straggler reference files. Skip these. 648 // It should be safe to skip references at this point because we checked above with 649 // the region if it thinks it is splittable and if we are here, it thinks it is 650 // splitable. 651 if (sfi.isReference()) { 652 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 653 continue; 654 } 655 if (filteredSfis == null) { 656 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 657 files.put(family, filteredSfis); 658 } 659 filteredSfis.add(sfi); 660 nbFiles++; 661 } 662 } 663 if (nbFiles == 0) { 664 // no file needs to be splitted. 665 return new Pair<Integer, Integer>(0, 0); 666 } 667 // Max #threads is the smaller of the number of storefiles or the default max determined above. 668 int maxThreads = Math.min( 669 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 670 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 671 nbFiles); 672 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" + 673 getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 674 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 675 Threads.getNamedThreadFactory("StoreFileSplitter-%1$d")); 676 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 677 678 // Split each store file. 679 for (Map.Entry<String, Collection<StoreFileInfo>>e: files.entrySet()) { 680 byte [] familyName = Bytes.toBytes(e.getKey()); 681 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 682 final Collection<StoreFileInfo> storeFiles = e.getValue(); 683 if (storeFiles != null && storeFiles.size() > 0) { 684 final CacheConfig cacheConf = new CacheConfig(conf, hcd); 685 for (StoreFileInfo storeFileInfo: storeFiles) { 686 StoreFileSplitter sfs = 687 new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(), 688 storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true)); 689 futures.add(threadPool.submit(sfs)); 690 } 691 } 692 } 693 // Shutdown the pool 694 threadPool.shutdown(); 695 696 // Wait for all the tasks to finish. 697 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 698 // hbase.regionserver.fileSplitTimeout. If set, use its value. 699 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 700 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 701 try { 702 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 703 if (stillRunning) { 704 threadPool.shutdownNow(); 705 // wait for the thread to shutdown completely. 706 while (!threadPool.isTerminated()) { 707 Thread.sleep(50); 708 } 709 throw new IOException( 710 "Took too long to split the" + " files and create the references, aborting split"); 711 } 712 } catch (InterruptedException e) { 713 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 714 } 715 716 int daughterA = 0; 717 int daughterB = 0; 718 // Look for any exception 719 for (Future<Pair<Path, Path>> future : futures) { 720 try { 721 Pair<Path, Path> p = future.get(); 722 daughterA += p.getFirst() != null ? 1 : 0; 723 daughterB += p.getSecond() != null ? 1 : 0; 724 } catch (InterruptedException e) { 725 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 726 } catch (ExecutionException e) { 727 throw new IOException(e); 728 } 729 } 730 731 if (LOG.isDebugEnabled()) { 732 LOG.debug("pid=" + getProcId() + " split storefiles for region " + 733 getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA + 734 " storefiles, Daughter B: " + daughterB + " storefiles."); 735 } 736 return new Pair<Integer, Integer>(daughterA, daughterB); 737 } 738 739 private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount, 740 final Path dir) throws IOException { 741 if (expectedReferenceFileCount != 0 && 742 expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) { 743 throw new IOException("Failing split. Expected reference file count isn't equal."); 744 } 745 } 746 747 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) 748 throws IOException { 749 if (LOG.isDebugEnabled()) { 750 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + 751 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 752 } 753 754 final byte[] splitRow = getSplitRow(); 755 final String familyName = Bytes.toString(family); 756 final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow, 757 false, splitPolicy); 758 final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow, 759 true, splitPolicy); 760 if (LOG.isDebugEnabled()) { 761 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + 762 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); 763 } 764 return new Pair<Path,Path>(path_first, path_second); 765 } 766 767 /** 768 * Utility class used to do the file splitting / reference writing 769 * in parallel instead of sequentially. 770 */ 771 private class StoreFileSplitter implements Callable<Pair<Path,Path>> { 772 private final HRegionFileSystem regionFs; 773 private final byte[] family; 774 private final HStoreFile sf; 775 776 /** 777 * Constructor that takes what it needs to split 778 * @param regionFs the file system 779 * @param family Family that contains the store file 780 * @param sf which file 781 */ 782 public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { 783 this.regionFs = regionFs; 784 this.sf = sf; 785 this.family = family; 786 } 787 788 @Override 789 public Pair<Path,Path> call() throws IOException { 790 return splitStoreFile(regionFs, family, sf); 791 } 792 } 793 794 /** 795 * Post split region actions before the Point-of-No-Return step 796 * @param env MasterProcedureEnv 797 **/ 798 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 799 throws IOException, InterruptedException { 800 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 801 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 802 if (cpHost != null) { 803 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 804 try { 805 for (Mutation p : metaEntries) { 806 RegionInfo.parseRegionName(p.getRow()); 807 } 808 } catch (IOException e) { 809 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 810 + "region name." 811 + "Mutations from coprocessor should only for hbase:meta table."); 812 throw e; 813 } 814 } 815 } 816 817 /** 818 * Add daughter regions to META 819 * @param env MasterProcedureEnv 820 */ 821 private void updateMetaForDaughterRegions(final MasterProcedureEnv env) throws IOException { 822 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 823 daughter_1_RI, daughter_2_RI); 824 } 825 826 /** 827 * Pre split region actions after the Point-of-No-Return step 828 * @param env MasterProcedureEnv 829 **/ 830 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 831 throws IOException, InterruptedException { 832 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 833 if (cpHost != null) { 834 cpHost.preSplitAfterMETAAction(getUser()); 835 } 836 } 837 838 /** 839 * Post split region actions 840 * @param env MasterProcedureEnv 841 **/ 842 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 843 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 844 if (cpHost != null) { 845 cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser()); 846 } 847 } 848 849 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 850 return env.getMasterServices().getAssignmentManager() 851 .getRegionStates().getRegionServerOfRegion(getParentRegion()); 852 } 853 854 private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env, 855 final int regionReplication) { 856 final UnassignProcedure[] procs = new UnassignProcedure[regionReplication]; 857 for (int i = 0; i < procs.length; ++i) { 858 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); 859 procs[i] = env.getAssignmentManager(). 860 createUnassignProcedure(hri, null, true, !RegionReplicaUtil.isDefaultReplica(hri)); 861 } 862 return procs; 863 } 864 865 private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env, 866 final int regionReplication) { 867 final ServerName targetServer = getParentRegionServerName(env); 868 final AssignProcedure[] procs = new AssignProcedure[regionReplication * 2]; 869 int procsIdx = 0; 870 for (int i = 0; i < regionReplication; ++i) { 871 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_1_RI, i); 872 procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer); 873 } 874 for (int i = 0; i < regionReplication; ++i) { 875 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_2_RI, i); 876 procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer); 877 } 878 return procs; 879 } 880 881 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 882 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 883 return htd.getRegionReplication(); 884 } 885 886 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 887 MasterFileSystem fs = env.getMasterFileSystem(); 888 long maxSequenceId = WALSplitter.getMaxRegionSequenceId(env.getMasterConfiguration(), 889 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 890 if (maxSequenceId > 0) { 891 WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(), 892 getWALRegionDir(env, daughter_1_RI), maxSequenceId); 893 WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(), 894 getWALRegionDir(env, daughter_2_RI), maxSequenceId); 895 } 896 } 897 898 /** 899 * The procedure could be restarted from a different machine. If the variable is null, we need to 900 * retrieve it. 901 * @return traceEnabled 902 */ 903 private boolean isTraceEnabled() { 904 if (traceEnabled == null) { 905 traceEnabled = LOG.isTraceEnabled(); 906 } 907 return traceEnabled; 908 } 909 910 @Override 911 protected boolean abort(MasterProcedureEnv env) { 912 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 913 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 914 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 915 return isRollbackSupported(getCurrentState())? super.abort(env): false; 916 } 917}