001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.master.assignment;
020
021import java.io.IOException;
022import java.io.InterruptedIOException;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.concurrent.Callable;
031import java.util.concurrent.ExecutionException;
032import java.util.concurrent.ExecutorService;
033import java.util.concurrent.Executors;
034import java.util.concurrent.Future;
035import java.util.concurrent.TimeUnit;
036import java.util.stream.Collectors;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileSystem;
039import org.apache.hadoop.fs.Path;
040import org.apache.hadoop.hbase.DoNotRetryIOException;
041import org.apache.hadoop.hbase.HConstants;
042import org.apache.hadoop.hbase.ServerName;
043import org.apache.hadoop.hbase.TableName;
044import org.apache.hadoop.hbase.UnknownRegionException;
045import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
046import org.apache.hadoop.hbase.client.MasterSwitchType;
047import org.apache.hadoop.hbase.client.Mutation;
048import org.apache.hadoop.hbase.client.RegionInfo;
049import org.apache.hadoop.hbase.client.RegionInfoBuilder;
050import org.apache.hadoop.hbase.client.RegionReplicaUtil;
051import org.apache.hadoop.hbase.client.TableDescriptor;
052import org.apache.hadoop.hbase.io.hfile.CacheConfig;
053import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
054import org.apache.hadoop.hbase.master.MasterFileSystem;
055import org.apache.hadoop.hbase.master.RegionState.State;
056import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
057import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
058import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
059import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure;
060import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
061import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
062import org.apache.hadoop.hbase.master.procedure.TableQueue;
063import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
064import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
065import org.apache.hadoop.hbase.quotas.QuotaExceededException;
066import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
067import org.apache.hadoop.hbase.regionserver.HStore;
068import org.apache.hadoop.hbase.regionserver.HStoreFile;
069import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
070import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
071import org.apache.hadoop.hbase.util.Bytes;
072import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
073import org.apache.hadoop.hbase.util.FSUtils;
074import org.apache.hadoop.hbase.util.Pair;
075import org.apache.hadoop.hbase.util.Threads;
076import org.apache.hadoop.hbase.wal.WALSplitter;
077import org.apache.hadoop.util.ReflectionUtils;
078import org.apache.yetus.audience.InterfaceAudience;
079import org.slf4j.Logger;
080import org.slf4j.LoggerFactory;
081
082import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
083
084import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
085import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
086import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
088
089/**
090 * The procedure to split a region in a table.
091 * Takes lock on the parent region.
092 * It holds the lock for the life of the procedure.
093 * <p>Throws exception on construction if determines context hostile to spllt (cluster going
094 * down or master is shutting down or table is disabled).</p>
095 */
096@InterfaceAudience.Private
097public class SplitTableRegionProcedure
098    extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
099  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
100  private RegionInfo daughter_1_RI;
101  private RegionInfo daughter_2_RI;
102  private byte[] bestSplitRow;
103  private RegionSplitPolicy splitPolicy;
104
105  public SplitTableRegionProcedure() {
106    // Required by the Procedure framework to create the procedure on replay
107  }
108
109  public SplitTableRegionProcedure(final MasterProcedureEnv env,
110      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
111    super(env, regionToSplit);
112    preflightChecks(env, true);
113    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
114    // we fail-fast on construction. There it skips the split with just a warning.
115    checkOnline(env, regionToSplit);
116    this.bestSplitRow = splitRow;
117    checkSplittable(env, regionToSplit, bestSplitRow);
118    final TableName table = regionToSplit.getTable();
119    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
120    this.daughter_1_RI = RegionInfoBuilder.newBuilder(table)
121        .setStartKey(regionToSplit.getStartKey())
122        .setEndKey(bestSplitRow)
123        .setSplit(false)
124        .setRegionId(rid)
125        .build();
126    this.daughter_2_RI = RegionInfoBuilder.newBuilder(table)
127        .setStartKey(bestSplitRow)
128        .setEndKey(regionToSplit.getEndKey())
129        .setSplit(false)
130        .setRegionId(rid)
131        .build();
132    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
133    if(htd.getRegionSplitPolicyClassName() != null) {
134      // Since we don't have region reference here, creating the split policy instance without it.
135      // This can be used to invoke methods which don't require Region reference. This instantiation
136      // of a class on Master-side though it only makes sense on the RegionServer-side is
137      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
138      Class<? extends RegionSplitPolicy> clazz =
139          RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration());
140      this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration());
141    }
142  }
143
144  /**
145   * Check whether there are recovered.edits in the parent closed region.
146   * @param env master env
147   * @throws IOException IOException
148   */
149  static boolean hasRecoveredEdits(MasterProcedureEnv env, RegionInfo ri) throws IOException {
150    return WALSplitter.hasRecoveredEdits(env.getMasterConfiguration(), ri);
151  }
152
153  /**
154   * Check whether the region is splittable
155   * @param env MasterProcedureEnv
156   * @param regionToSplit parent Region to be split
157   * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS
158   * @throws IOException
159   */
160  private void checkSplittable(final MasterProcedureEnv env,
161      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
162    // Ask the remote RS if this region is splittable.
163    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time.
164    if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
165      throw new IllegalArgumentException ("Can't invoke split on non-default regions directly");
166    }
167    RegionStateNode node =
168        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
169    IOException splittableCheckIOE = null;
170    boolean splittable = false;
171    if (node != null) {
172      try {
173        if (bestSplitRow == null || bestSplitRow.length == 0) {
174          LOG.info("splitKey isn't explicitly specified, " + " will try to find a best split key from RS");
175        }
176        // Always set bestSplitRow request as true here,
177        // need to call Region#checkSplit to check it splittable or not
178        GetRegionInfoResponse response =
179            Util.getRegionInfoResponse(env, node.getRegionLocation(), node.getRegionInfo(), true);
180        if(bestSplitRow == null || bestSplitRow.length == 0) {
181          bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
182        }
183        splittable = response.hasSplittable() && response.getSplittable();
184
185        if (LOG.isDebugEnabled()) {
186          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
187        }
188      } catch (IOException e) {
189        splittableCheckIOE = e;
190      }
191    }
192
193    if (!splittable) {
194      IOException e = new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
195      if (splittableCheckIOE != null) e.initCause(splittableCheckIOE);
196      throw e;
197    }
198
199    if(bestSplitRow == null || bestSplitRow.length == 0) {
200      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, "
201          + "maybe table is too small for auto split. For force split, try specifying split row");
202    }
203
204    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
205      throw new DoNotRetryIOException(
206        "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow));
207    }
208
209    if (!regionToSplit.containsRow(bestSplitRow)) {
210      throw new DoNotRetryIOException(
211        "Split row is not inside region key range splitKey:" + Bytes.toStringBinary(splitRow) +
212        " region: " + regionToSplit);
213    }
214  }
215
216  /**
217   * Calculate daughter regionid to use.
218   * @param hri Parent {@link RegionInfo}
219   * @return Daughter region id (timestamp) to use.
220   */
221  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
222    long rid = EnvironmentEdgeManager.currentTime();
223    // Regionid is timestamp.  Can't be less than that of parent else will insert
224    // at wrong location in hbase:meta (See HBASE-710).
225    if (rid < hri.getRegionId()) {
226      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
227        " but current time here is " + rid);
228      rid = hri.getRegionId() + 1;
229    }
230    return rid;
231  }
232
233  @Override
234  protected Flow executeFromState(final MasterProcedureEnv env, final SplitTableRegionState state)
235      throws InterruptedException {
236    LOG.trace("{} execute state={}", this, state);
237
238    try {
239      switch (state) {
240        case SPLIT_TABLE_REGION_PREPARE:
241          if (prepareSplitRegion(env)) {
242            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
243            break;
244          } else {
245            return Flow.NO_MORE_STATE;
246          }
247        case SPLIT_TABLE_REGION_PRE_OPERATION:
248          preSplitRegion(env);
249          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
250          break;
251        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
252          addChildProcedure(createUnassignProcedures(env, getRegionReplication(env)));
253          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
254          break;
255        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
256          if (hasRecoveredEdits(env, getRegion())) {
257            // If recovered edits, reopen parent region and then re-run the close by going back to
258            // SPLIT_TABLE_REGION_CLOSE_PARENT_REGION. We might have to cycle here a few times
259            // (TODO: Add being able to open a region in read-only mode). Open the primary replica
260            // in this case only where we just want to pickup the left-out replicated.edits.
261            LOG.info("Found recovered.edits under {}, reopen so we pickup these missed edits!",
262                getRegion().getEncodedName());
263            addChildProcedure(env.getAssignmentManager().createAssignProcedure(getParentRegion()));
264            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
265          } else {
266            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
267          }
268          break;
269        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
270          createDaughterRegions(env);
271          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
272          break;
273        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
274          writeMaxSequenceIdFile(env);
275          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
276          break;
277        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
278          preSplitRegionBeforeMETA(env);
279          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
280          break;
281        case SPLIT_TABLE_REGION_UPDATE_META:
282          updateMeta(env);
283          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
284          break;
285        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
286          preSplitRegionAfterMETA(env);
287          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
288          break;
289        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
290          addChildProcedure(createAssignProcedures(env, getRegionReplication(env)));
291          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
292          break;
293        case SPLIT_TABLE_REGION_POST_OPERATION:
294          postSplitRegion(env);
295          return Flow.NO_MORE_STATE;
296        default:
297          throw new UnsupportedOperationException(this + " unhandled state=" + state);
298      }
299    } catch (IOException e) {
300      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
301      if (!isRollbackSupported(state)) {
302        // We reach a state that cannot be rolled back. We just need to keep retrying.
303        LOG.warn(msg, e);
304      } else {
305        LOG.error(msg, e);
306        setFailure("master-split-regions", e);
307      }
308    }
309    // if split fails,  need to call ((HRegion)parent).clearSplit() when it is a force split
310    return Flow.HAS_MORE_STATE;
311  }
312
313  /**
314   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously
315   * submitted for parent region to be split (rollback doesn't wait on the completion of the
316   * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures.
317   * See HBASE-19851 for details.
318   */
319  @Override
320  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
321      throws IOException, InterruptedException {
322    LOG.trace("{} rollback state={}", this, state);
323
324    try {
325      switch (state) {
326        case SPLIT_TABLE_REGION_POST_OPERATION:
327        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
328        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
329        case SPLIT_TABLE_REGION_UPDATE_META:
330          // PONR
331          throw new UnsupportedOperationException(this + " unhandled state=" + state);
332        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
333          break;
334        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
335        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
336          // Doing nothing, as re-open parent region would clean up daughter region directories.
337          break;
338        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
339          // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION,
340          // we will bring parent region online
341          break;
342        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
343          openParentRegion(env);
344          break;
345        case SPLIT_TABLE_REGION_PRE_OPERATION:
346          postRollBackSplitRegion(env);
347          break;
348        case SPLIT_TABLE_REGION_PREPARE:
349          break; // nothing to do
350        default:
351          throw new UnsupportedOperationException(this + " unhandled state=" + state);
352      }
353    } catch (IOException e) {
354      // This will be retried. Unless there is a bug in the code,
355      // this should be just a "temporary error" (e.g. network down)
356      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state +
357          " for splitting the region "
358        + getParentRegion().getEncodedName() + " in table " + getTableName(), e);
359      throw e;
360    }
361  }
362
363  /*
364   * Check whether we are in the state that can be rollback
365   */
366  @Override
367  protected boolean isRollbackSupported(final SplitTableRegionState state) {
368    switch (state) {
369      case SPLIT_TABLE_REGION_POST_OPERATION:
370      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
371      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
372      case SPLIT_TABLE_REGION_UPDATE_META:
373        // It is not safe to rollback if we reach to these states.
374        return false;
375      default:
376        break;
377    }
378    return true;
379  }
380
381  @Override
382  protected SplitTableRegionState getState(final int stateId) {
383    return SplitTableRegionState.forNumber(stateId);
384  }
385
386  @Override
387  protected int getStateId(final SplitTableRegionState state) {
388    return state.getNumber();
389  }
390
391  @Override
392  protected SplitTableRegionState getInitialState() {
393    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
394  }
395
396  @Override
397  protected void serializeStateData(ProcedureStateSerializer serializer)
398      throws IOException {
399    super.serializeStateData(serializer);
400
401    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
402        MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
403        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
404        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
405        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI))
406        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI));
407    serializer.serialize(splitTableRegionMsg.build());
408  }
409
410  @Override
411  protected void deserializeStateData(ProcedureStateSerializer serializer)
412      throws IOException {
413    super.deserializeStateData(serializer);
414
415    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
416        serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
417    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
418    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
419    assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2);
420    daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
421    daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
422  }
423
424  @Override
425  public void toStringClassDetails(StringBuilder sb) {
426    sb.append(getClass().getSimpleName());
427    sb.append(" table=");
428    sb.append(getTableName());
429    sb.append(", parent=");
430    sb.append(getParentRegion().getShortNameToLog());
431    sb.append(", daughterA=");
432    sb.append(daughter_1_RI.getShortNameToLog());
433    sb.append(", daughterB=");
434    sb.append(daughter_2_RI.getShortNameToLog());
435  }
436
437  private RegionInfo getParentRegion() {
438    return getRegion();
439  }
440
441  @Override
442  public TableOperationType getTableOperationType() {
443    return TableOperationType.REGION_SPLIT;
444  }
445
446  @Override
447  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
448    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
449  }
450
451  private byte[] getSplitRow() {
452    return daughter_2_RI.getStartKey();
453  }
454
455  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
456
457  /**
458   * Prepare to Split region.
459   * @param env MasterProcedureEnv
460   */
461  @VisibleForTesting
462  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
463    // Fail if we are taking snapshot for the given table
464    if (env.getMasterServices().getSnapshotManager()
465      .isTakingSnapshot(getParentRegion().getTable())) {
466      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() +
467        ", because we are taking snapshot for the table " + getParentRegion().getTable()));
468      return false;
469    }
470    // Check whether the region is splittable
471    RegionStateNode node =
472        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
473
474    if (node == null) {
475      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
476    }
477
478    RegionInfo parentHRI = node.getRegionInfo();
479    if (parentHRI == null) {
480      LOG.info("Unsplittable; parent region is null; node={}", node);
481      return false;
482    }
483    // Lookup the parent HRI state from the AM, which has the latest updated info.
484    // Protect against the case where concurrent SPLIT requests came in and succeeded
485    // just before us.
486    if (node.isInState(State.SPLIT)) {
487      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
488      return false;
489    }
490    if (parentHRI.isSplit() || parentHRI.isOffline()) {
491      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
492      return false;
493    }
494
495    // expected parent to be online or closed
496    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
497      // We may have SPLIT already?
498      setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() +
499          " FAILED because state=" + node.getState() + "; expected " +
500          Arrays.toString(EXPECTED_SPLIT_STATES)));
501      return false;
502    }
503
504    // Since we have the lock and the master is coordinating the operation
505    // we are always able to split the region
506    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
507      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
508      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() +
509          " failed due to split switch off"));
510      return false;
511    }
512
513    // See HBASE-21395, for 2.0.x and 2.1.x only.
514    // A safe fence here, if there is a table procedure going on, abort the split.
515    // There some cases that may lead to table procedure roll back (more serious
516    // than roll back the split procedure here), or the split parent was brought online
517    // by the table procedure because of the race between split procedure and table procedure
518    List<AbstractStateMachineTableProcedure> tableProcedures = env
519        .getMasterServices().getProcedures().stream()
520        .filter(p -> p instanceof AbstractStateMachineTableProcedure)
521        .map(p -> (AbstractStateMachineTableProcedure) p)
522        .filter(p -> p.getTableName().equals(getParentRegion().getTable()) &&
523            !p.isFinished() && TableQueue.requireTableExclusiveLock(p))
524        .collect(Collectors.toList());
525    if (tableProcedures != null && tableProcedures.size() > 0) {
526      throw new DoNotRetryIOException(tableProcedures.get(0).toString()
527          + " is going on against the same table, abort the split of " + this
528          .toString());
529    }
530
531    // set node state as SPLITTING
532    node.setState(State.SPLITTING);
533
534    return true;
535  }
536
537  /**
538   * Action before splitting region in a table.
539   * @param env MasterProcedureEnv
540   */
541  private void preSplitRegion(final MasterProcedureEnv env)
542      throws IOException, InterruptedException {
543    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
544    if (cpHost != null) {
545      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
546    }
547
548    // TODO: Clean up split and merge. Currently all over the place.
549    // Notify QuotaManager and RegionNormalizer
550    try {
551      env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion());
552    } catch (QuotaExceededException e) {
553      env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(),
554          NormalizationPlan.PlanType.SPLIT);
555      throw e;
556    }
557  }
558
559  /**
560   * Action after rollback a split table region action.
561   * @param env MasterProcedureEnv
562   */
563  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
564    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
565    if (cpHost != null) {
566      cpHost.postRollBackSplitRegionAction(getUser());
567    }
568  }
569
570  /**
571   * Rollback close parent region
572   * @param env MasterProcedureEnv
573   */
574  private void openParentRegion(final MasterProcedureEnv env) throws IOException {
575    // Check whether the region is closed; if so, open it in the same server
576    final int regionReplication = getRegionReplication(env);
577    final ServerName serverName = getParentRegionServerName(env);
578
579    final AssignProcedure[] procs = createAssignProcedures(regionReplication, env,
580      Collections.singletonList(getParentRegion()), serverName);
581    env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs);
582  }
583
584  /**
585   * Create daughter regions
586   * @param env MasterProcedureEnv
587   */
588  @VisibleForTesting
589  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
590    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
591    final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName());
592    final FileSystem fs = mfs.getFileSystem();
593    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
594      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
595    regionFs.createSplitsDir(daughter_1_RI, daughter_2_RI);
596
597    Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
598
599    assertReferenceFileCount(fs, expectedReferences.getFirst(),
600      regionFs.getSplitsDir(daughter_1_RI));
601    //Move the files from the temporary .splits to the final /table/region directory
602    regionFs.commitDaughterRegion(daughter_1_RI);
603    assertReferenceFileCount(fs, expectedReferences.getFirst(),
604      new Path(tabledir, daughter_1_RI.getEncodedName()));
605
606    assertReferenceFileCount(fs, expectedReferences.getSecond(),
607      regionFs.getSplitsDir(daughter_2_RI));
608    regionFs.commitDaughterRegion(daughter_2_RI);
609    assertReferenceFileCount(fs, expectedReferences.getSecond(),
610      new Path(tabledir, daughter_2_RI.getEncodedName()));
611  }
612
613  /**
614   * Create Split directory
615   * @param env MasterProcedureEnv
616   */
617  private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
618      final HRegionFileSystem regionFs) throws IOException {
619    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
620    final Configuration conf = env.getMasterConfiguration();
621    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
622    // The following code sets up a thread pool executor with as many slots as
623    // there's files to split. It then fires up everything, waits for
624    // completion and finally checks for any exception
625    //
626    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
627    // Nothing to unroll here if failure -- re-run createSplitsDir will
628    // clean this up.
629    int nbFiles = 0;
630    final Map<String, Collection<StoreFileInfo>> files =
631        new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
632    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
633      String family = cfd.getNameAsString();
634      Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family);
635      if (sfis == null) {
636        continue;
637      }
638      Collection<StoreFileInfo> filteredSfis = null;
639      for (StoreFileInfo sfi : sfis) {
640        // Filter. There is a lag cleaning up compacted reference files. They get cleared
641        // after a delay in case outstanding Scanners still have references. Because of this,
642        // the listing of the Store content may have straggler reference files. Skip these.
643        // It should be safe to skip references at this point because we checked above with
644        // the region if it thinks it is splittable and if we are here, it thinks it is
645        // splitable.
646        if (sfi.isReference()) {
647          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
648          continue;
649        }
650        if (filteredSfis == null) {
651          filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
652          files.put(family, filteredSfis);
653        }
654        filteredSfis.add(sfi);
655        nbFiles++;
656      }
657    }
658    if (nbFiles == 0) {
659      // no file needs to be splitted.
660      return new Pair<Integer, Integer>(0, 0);
661    }
662    // Max #threads is the smaller of the number of storefiles or the default max determined above.
663    int maxThreads = Math.min(
664      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
665        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
666      nbFiles);
667    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" +
668        getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
669    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
670      Threads.newDaemonThreadFactory("StoreFileSplitter-%1$d"));
671    final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);
672
673    // Split each store file.
674    for (Map.Entry<String, Collection<StoreFileInfo>>e: files.entrySet()) {
675      byte [] familyName = Bytes.toBytes(e.getKey());
676      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
677      final Collection<StoreFileInfo> storeFiles = e.getValue();
678      if (storeFiles != null && storeFiles.size() > 0) {
679        final CacheConfig cacheConf = new CacheConfig(conf, hcd);
680        for (StoreFileInfo storeFileInfo: storeFiles) {
681          StoreFileSplitter sfs =
682              new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(),
683                  storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true));
684          futures.add(threadPool.submit(sfs));
685        }
686      }
687    }
688    // Shutdown the pool
689    threadPool.shutdown();
690
691    // Wait for all the tasks to finish.
692    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
693    // hbase.regionserver.fileSplitTimeout. If set, use its value.
694    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
695      conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
696    try {
697      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
698      if (stillRunning) {
699        threadPool.shutdownNow();
700        // wait for the thread to shutdown completely.
701        while (!threadPool.isTerminated()) {
702          Thread.sleep(50);
703        }
704        throw new IOException(
705            "Took too long to split the" + " files and create the references, aborting split");
706      }
707    } catch (InterruptedException e) {
708      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
709    }
710
711    int daughterA = 0;
712    int daughterB = 0;
713    // Look for any exception
714    for (Future<Pair<Path, Path>> future : futures) {
715      try {
716        Pair<Path, Path> p = future.get();
717        daughterA += p.getFirst() != null ? 1 : 0;
718        daughterB += p.getSecond() != null ? 1 : 0;
719      } catch (InterruptedException e) {
720        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
721      } catch (ExecutionException e) {
722        throw new IOException(e);
723      }
724    }
725
726    if (LOG.isDebugEnabled()) {
727      LOG.debug("pid=" + getProcId() + " split storefiles for region " +
728          getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA +
729          " storefiles, Daughter B: " + daughterB + " storefiles.");
730    }
731    return new Pair<Integer, Integer>(daughterA, daughterB);
732  }
733
734  private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount,
735      final Path dir) throws IOException {
736    if (expectedReferenceFileCount != 0 &&
737        expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) {
738      throw new IOException("Failing split. Expected reference file count isn't equal.");
739    }
740  }
741
742  private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf)
743    throws IOException {
744    if (LOG.isDebugEnabled()) {
745      LOG.debug("pid=" + getProcId() + " splitting started for store file: " +
746          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
747    }
748
749    final byte[] splitRow = getSplitRow();
750    final String familyName = Bytes.toString(family);
751    final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow,
752        false, splitPolicy);
753    final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow,
754       true, splitPolicy);
755    if (LOG.isDebugEnabled()) {
756      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " +
757          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
758    }
759    return new Pair<Path,Path>(path_first, path_second);
760  }
761
762  /**
763   * Utility class used to do the file splitting / reference writing
764   * in parallel instead of sequentially.
765   */
766  private class StoreFileSplitter implements Callable<Pair<Path,Path>> {
767    private final HRegionFileSystem regionFs;
768    private final byte[] family;
769    private final HStoreFile sf;
770
771    /**
772     * Constructor that takes what it needs to split
773     * @param regionFs the file system
774     * @param family Family that contains the store file
775     * @param sf which file
776     */
777    public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) {
778      this.regionFs = regionFs;
779      this.sf = sf;
780      this.family = family;
781    }
782
783    @Override
784    public Pair<Path,Path> call() throws IOException {
785      return splitStoreFile(regionFs, family, sf);
786    }
787  }
788
789  /**
790   * Post split region actions before the Point-of-No-Return step
791   * @param env MasterProcedureEnv
792   **/
793  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
794      throws IOException, InterruptedException {
795    final List<Mutation> metaEntries = new ArrayList<Mutation>();
796    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
797    if (cpHost != null) {
798      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
799      try {
800        for (Mutation p : metaEntries) {
801          RegionInfo.parseRegionName(p.getRow());
802        }
803      } catch (IOException e) {
804        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
805            + "region name."
806            + "Mutations from coprocessor should only for hbase:meta table.");
807        throw e;
808      }
809    }
810  }
811
812  /**
813   * Add daughter regions to META
814   * @param env MasterProcedureEnv
815   */
816  private void updateMeta(final MasterProcedureEnv env) throws IOException {
817    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
818      daughter_1_RI, daughter_2_RI);
819  }
820
821  /**
822   * Pre split region actions after the Point-of-No-Return step
823   * @param env MasterProcedureEnv
824   **/
825  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
826      throws IOException, InterruptedException {
827    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
828    if (cpHost != null) {
829      cpHost.preSplitAfterMETAAction(getUser());
830    }
831  }
832
833  /**
834   * Post split region actions
835   * @param env MasterProcedureEnv
836   **/
837  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
838    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
839    if (cpHost != null) {
840      cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser());
841    }
842  }
843
844  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
845    return env.getMasterServices().getAssignmentManager()
846      .getRegionStates().getRegionServerOfRegion(getParentRegion());
847  }
848
849  private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env,
850      final int regionReplication) {
851    final UnassignProcedure[] procs = new UnassignProcedure[regionReplication];
852    for (int i = 0; i < procs.length; ++i) {
853      final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i);
854      procs[i] = env.getAssignmentManager().
855          createUnassignProcedure(hri, null, true, !RegionReplicaUtil.isDefaultReplica(hri));
856    }
857    return procs;
858  }
859
860  private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env,
861      final int regionReplication) {
862    final ServerName targetServer = getParentRegionServerName(env);
863    List<RegionInfo> daughterRegions = new ArrayList<RegionInfo>(2);
864    daughterRegions.add(daughter_1_RI);
865    daughterRegions.add(daughter_2_RI);
866    return createAssignProcedures(regionReplication, env, daughterRegions, targetServer);
867  }
868
869  private AssignProcedure[] createAssignProcedures(final int regionReplication,
870      final MasterProcedureEnv env, final List<RegionInfo> hris, final ServerName serverName) {
871    final AssignProcedure[] procs = new AssignProcedure[hris.size() * regionReplication];
872    int procsIdx = 0;
873    for (int i = 0; i < hris.size(); ++i) {
874      // create procs for the primary region with the target server.
875      final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), 0);
876      procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, serverName);
877    }
878    if (regionReplication > 1) {
879      List<RegionInfo> regionReplicas =
880          new ArrayList<RegionInfo>(hris.size() * (regionReplication - 1));
881      for (int i = 0; i < hris.size(); ++i) {
882        // We don't include primary replica here
883        for (int j = 1; j < regionReplication; ++j) {
884          regionReplicas.add(RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j));
885        }
886      }
887      // for the replica regions exclude the primary region's server and call LB's roundRobin
888      // assignment
889      AssignProcedure[] replicaAssignProcs = env.getAssignmentManager()
890          .createRoundRobinAssignProcedures(regionReplicas, Collections.singletonList(serverName));
891      for (AssignProcedure proc : replicaAssignProcs) {
892        procs[procsIdx++] = proc;
893      }
894    }
895    return procs;
896  }
897
898  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
899    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
900    return htd.getRegionReplication();
901  }
902
903  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
904    MasterFileSystem fs = env.getMasterFileSystem();
905    long maxSequenceId = WALSplitter.getMaxRegionSequenceId(env.getMasterConfiguration(),
906      getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
907    if (maxSequenceId > 0) {
908      WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(),
909        getWALRegionDir(env, daughter_1_RI), maxSequenceId);
910      WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(),
911        getWALRegionDir(env, daughter_2_RI), maxSequenceId);
912    }
913  }
914
915  @Override
916  protected boolean abort(MasterProcedureEnv env) {
917    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
918    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
919    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
920    return isRollbackSupported(getCurrentState())? super.abort(env): false;
921  }
922}