001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.master.assignment;
020
021import java.io.IOException;
022import java.io.InterruptedIOException;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.concurrent.Callable;
031import java.util.concurrent.ExecutionException;
032import java.util.concurrent.ExecutorService;
033import java.util.concurrent.Executors;
034import java.util.concurrent.Future;
035import java.util.concurrent.TimeUnit;
036import java.util.stream.Collectors;
037
038import org.apache.hadoop.conf.Configuration;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.hbase.DoNotRetryIOException;
042import org.apache.hadoop.hbase.HConstants;
043import org.apache.hadoop.hbase.ServerName;
044import org.apache.hadoop.hbase.TableName;
045import org.apache.hadoop.hbase.UnknownRegionException;
046import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
047import org.apache.hadoop.hbase.client.MasterSwitchType;
048import org.apache.hadoop.hbase.client.Mutation;
049import org.apache.hadoop.hbase.client.RegionInfo;
050import org.apache.hadoop.hbase.client.RegionInfoBuilder;
051import org.apache.hadoop.hbase.client.RegionReplicaUtil;
052import org.apache.hadoop.hbase.client.TableDescriptor;
053import org.apache.hadoop.hbase.io.hfile.CacheConfig;
054import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
055import org.apache.hadoop.hbase.master.MasterFileSystem;
056import org.apache.hadoop.hbase.master.RegionState.State;
057import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
058import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
059import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
060import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure;
061import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
062import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
063import org.apache.hadoop.hbase.master.procedure.TableQueue;
064import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
065import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
066import org.apache.hadoop.hbase.quotas.QuotaExceededException;
067import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
068import org.apache.hadoop.hbase.regionserver.HStore;
069import org.apache.hadoop.hbase.regionserver.HStoreFile;
070import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
071import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
072import org.apache.hadoop.hbase.util.Bytes;
073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
074import org.apache.hadoop.hbase.util.FSUtils;
075import org.apache.hadoop.hbase.util.Pair;
076import org.apache.hadoop.hbase.util.Threads;
077import org.apache.hadoop.hbase.wal.WALSplitter;
078import org.apache.hadoop.util.ReflectionUtils;
079import org.apache.yetus.audience.InterfaceAudience;
080import org.slf4j.Logger;
081import org.slf4j.LoggerFactory;
082
083import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
084
085import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
086import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
088import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
089
090/**
091 * The procedure to split a region in a table.
092 * Takes lock on the parent region.
093 * It holds the lock for the life of the procedure.
094 * <p>Throws exception on construction if determines context hostile to spllt (cluster going
095 * down or master is shutting down or table is disabled).</p>
096 */
097@InterfaceAudience.Private
098public class SplitTableRegionProcedure
099    extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
100  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
101  private Boolean traceEnabled = null;
102  private RegionInfo daughter_1_RI;
103  private RegionInfo daughter_2_RI;
104  private byte[] bestSplitRow;
105  private RegionSplitPolicy splitPolicy;
106
107  public SplitTableRegionProcedure() {
108    // Required by the Procedure framework to create the procedure on replay
109  }
110
111  public SplitTableRegionProcedure(final MasterProcedureEnv env,
112      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
113    super(env, regionToSplit);
114    preflightChecks(env, true);
115    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
116    // we fail-fast on construction. There it skips the split with just a warning.
117    checkOnline(env, regionToSplit);
118    this.bestSplitRow = splitRow;
119    checkSplittable(env, regionToSplit, bestSplitRow);
120    final TableName table = regionToSplit.getTable();
121    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
122    this.daughter_1_RI = RegionInfoBuilder.newBuilder(table)
123        .setStartKey(regionToSplit.getStartKey())
124        .setEndKey(bestSplitRow)
125        .setSplit(false)
126        .setRegionId(rid)
127        .build();
128    this.daughter_2_RI = RegionInfoBuilder.newBuilder(table)
129        .setStartKey(bestSplitRow)
130        .setEndKey(regionToSplit.getEndKey())
131        .setSplit(false)
132        .setRegionId(rid)
133        .build();
134    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
135    if(htd.getRegionSplitPolicyClassName() != null) {
136      // Since we don't have region reference here, creating the split policy instance without it.
137      // This can be used to invoke methods which don't require Region reference. This instantiation
138      // of a class on Master-side though it only makes sense on the RegionServer-side is
139      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
140      Class<? extends RegionSplitPolicy> clazz =
141          RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration());
142      this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration());
143    }
144  }
145
146  /**
147   * Check whether there are recovered.edits in the parent closed region.
148   * @param env master env
149   * @throws IOException IOException
150   */
151  static boolean hasRecoveredEdits(MasterProcedureEnv env, RegionInfo ri) throws IOException {
152    return WALSplitter.hasRecoveredEdits(env.getMasterConfiguration(), ri);
153  }
154
155  /**
156   * Check whether the region is splittable
157   * @param env MasterProcedureEnv
158   * @param regionToSplit parent Region to be split
159   * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS
160   * @throws IOException
161   */
162  private void checkSplittable(final MasterProcedureEnv env,
163      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
164    // Ask the remote RS if this region is splittable.
165    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time.
166    if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
167      throw new IllegalArgumentException ("Can't invoke split on non-default regions directly");
168    }
169    RegionStateNode node =
170        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
171    IOException splittableCheckIOE = null;
172    boolean splittable = false;
173    if (node != null) {
174      try {
175        if (bestSplitRow == null || bestSplitRow.length == 0) {
176          LOG.info("splitKey isn't explicitly specified, " + " will try to find a best split key from RS");
177        }
178        // Always set bestSplitRow request as true here,
179        // need to call Region#checkSplit to check it splittable or not
180        GetRegionInfoResponse response =
181            Util.getRegionInfoResponse(env, node.getRegionLocation(), node.getRegionInfo(), true);
182        if(bestSplitRow == null || bestSplitRow.length == 0) {
183          bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
184        }
185        splittable = response.hasSplittable() && response.getSplittable();
186
187        if (LOG.isDebugEnabled()) {
188          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
189        }
190      } catch (IOException e) {
191        splittableCheckIOE = e;
192      }
193    }
194
195    if (!splittable) {
196      IOException e = new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
197      if (splittableCheckIOE != null) e.initCause(splittableCheckIOE);
198      throw e;
199    }
200
201    if(bestSplitRow == null || bestSplitRow.length == 0) {
202      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, "
203          + "maybe table is too small for auto split. For force split, try specifying split row");
204    }
205
206    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
207      throw new DoNotRetryIOException(
208        "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow));
209    }
210
211    if (!regionToSplit.containsRow(bestSplitRow)) {
212      throw new DoNotRetryIOException(
213        "Split row is not inside region key range splitKey:" + Bytes.toStringBinary(splitRow) +
214        " region: " + regionToSplit);
215    }
216  }
217
218  /**
219   * Calculate daughter regionid to use.
220   * @param hri Parent {@link RegionInfo}
221   * @return Daughter region id (timestamp) to use.
222   */
223  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
224    long rid = EnvironmentEdgeManager.currentTime();
225    // Regionid is timestamp.  Can't be less than that of parent else will insert
226    // at wrong location in hbase:meta (See HBASE-710).
227    if (rid < hri.getRegionId()) {
228      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
229        " but current time here is " + rid);
230      rid = hri.getRegionId() + 1;
231    }
232    return rid;
233  }
234
235  @Override
236  protected Flow executeFromState(final MasterProcedureEnv env, final SplitTableRegionState state)
237      throws InterruptedException {
238    LOG.trace("{} execute state={}", this, state);
239
240    try {
241      switch (state) {
242        case SPLIT_TABLE_REGION_PREPARE:
243          if (prepareSplitRegion(env)) {
244            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
245            break;
246          } else {
247            return Flow.NO_MORE_STATE;
248          }
249        case SPLIT_TABLE_REGION_PRE_OPERATION:
250          preSplitRegion(env);
251          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
252          break;
253        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
254          addChildProcedure(createUnassignProcedures(env, getRegionReplication(env)));
255          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
256          break;
257        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
258          if (hasRecoveredEdits(env, getRegion())) {
259            // If recovered edits, reopen parent region and then re-run the close by going back to
260            // SPLIT_TABLE_REGION_CLOSE_PARENT_REGION. We might have to cycle here a few times
261            // (TODO: Add being able to open a region in read-only mode). Open the primary replica
262            // in this case only where we just want to pickup the left-out replicated.edits.
263            LOG.info("Found recovered.edits under {}, reopen so we pickup these missed edits!",
264                getRegion().getEncodedName());
265            addChildProcedure(env.getAssignmentManager().createAssignProcedure(getParentRegion()));
266            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
267          } else {
268            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
269          }
270          break;
271        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
272          createDaughterRegions(env);
273          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
274          break;
275        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
276          writeMaxSequenceIdFile(env);
277          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
278          break;
279        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
280          preSplitRegionBeforeMETA(env);
281          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
282          break;
283        case SPLIT_TABLE_REGION_UPDATE_META:
284          updateMeta(env);
285          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
286          break;
287        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
288          preSplitRegionAfterMETA(env);
289          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
290          break;
291        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
292          addChildProcedure(createAssignProcedures(env, getRegionReplication(env)));
293          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
294          break;
295        case SPLIT_TABLE_REGION_POST_OPERATION:
296          postSplitRegion(env);
297          return Flow.NO_MORE_STATE;
298        default:
299          throw new UnsupportedOperationException(this + " unhandled state=" + state);
300      }
301    } catch (IOException e) {
302      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
303      if (!isRollbackSupported(state)) {
304        // We reach a state that cannot be rolled back. We just need to keep retrying.
305        LOG.warn(msg, e);
306      } else {
307        LOG.error(msg, e);
308        setFailure("master-split-regions", e);
309      }
310    }
311    // if split fails,  need to call ((HRegion)parent).clearSplit() when it is a force split
312    return Flow.HAS_MORE_STATE;
313  }
314
315  /**
316   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously
317   * submitted for parent region to be split (rollback doesn't wait on the completion of the
318   * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures.
319   * See HBASE-19851 for details.
320   */
321  @Override
322  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
323      throws IOException, InterruptedException {
324    if (isTraceEnabled()) {
325      LOG.trace(this + " rollback state=" + state);
326    }
327
328    try {
329      switch (state) {
330        case SPLIT_TABLE_REGION_POST_OPERATION:
331        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
332        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
333        case SPLIT_TABLE_REGION_UPDATE_META:
334          // PONR
335          throw new UnsupportedOperationException(this + " unhandled state=" + state);
336        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
337          break;
338        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
339        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
340          // Doing nothing, as re-open parent region would clean up daughter region directories.
341          break;
342        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
343          // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION,
344          // we will bring parent region online
345          break;
346        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
347          openParentRegion(env);
348          break;
349        case SPLIT_TABLE_REGION_PRE_OPERATION:
350          postRollBackSplitRegion(env);
351          break;
352        case SPLIT_TABLE_REGION_PREPARE:
353          break; // nothing to do
354        default:
355          throw new UnsupportedOperationException(this + " unhandled state=" + state);
356      }
357    } catch (IOException e) {
358      // This will be retried. Unless there is a bug in the code,
359      // this should be just a "temporary error" (e.g. network down)
360      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state +
361          " for splitting the region "
362        + getParentRegion().getEncodedName() + " in table " + getTableName(), e);
363      throw e;
364    }
365  }
366
367  /*
368   * Check whether we are in the state that can be rollback
369   */
370  @Override
371  protected boolean isRollbackSupported(final SplitTableRegionState state) {
372    switch (state) {
373      case SPLIT_TABLE_REGION_POST_OPERATION:
374      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
375      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
376      case SPLIT_TABLE_REGION_UPDATE_META:
377        // It is not safe to rollback if we reach to these states.
378        return false;
379      default:
380        break;
381    }
382    return true;
383  }
384
385  @Override
386  protected SplitTableRegionState getState(final int stateId) {
387    return SplitTableRegionState.forNumber(stateId);
388  }
389
390  @Override
391  protected int getStateId(final SplitTableRegionState state) {
392    return state.getNumber();
393  }
394
395  @Override
396  protected SplitTableRegionState getInitialState() {
397    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
398  }
399
400  @Override
401  protected void serializeStateData(ProcedureStateSerializer serializer)
402      throws IOException {
403    super.serializeStateData(serializer);
404
405    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
406        MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
407        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
408        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
409        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI))
410        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI));
411    serializer.serialize(splitTableRegionMsg.build());
412  }
413
414  @Override
415  protected void deserializeStateData(ProcedureStateSerializer serializer)
416      throws IOException {
417    super.deserializeStateData(serializer);
418
419    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
420        serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
421    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
422    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
423    assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2);
424    daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
425    daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
426  }
427
428  @Override
429  public void toStringClassDetails(StringBuilder sb) {
430    sb.append(getClass().getSimpleName());
431    sb.append(" table=");
432    sb.append(getTableName());
433    sb.append(", parent=");
434    sb.append(getParentRegion().getShortNameToLog());
435    sb.append(", daughterA=");
436    sb.append(daughter_1_RI.getShortNameToLog());
437    sb.append(", daughterB=");
438    sb.append(daughter_2_RI.getShortNameToLog());
439  }
440
441  private RegionInfo getParentRegion() {
442    return getRegion();
443  }
444
445  @Override
446  public TableOperationType getTableOperationType() {
447    return TableOperationType.REGION_SPLIT;
448  }
449
450  @Override
451  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
452    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
453  }
454
455  private byte[] getSplitRow() {
456    return daughter_2_RI.getStartKey();
457  }
458
459  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
460
461  /**
462   * Prepare to Split region.
463   * @param env MasterProcedureEnv
464   */
465  @VisibleForTesting
466  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
467    // Fail if we are taking snapshot for the given table
468    if (env.getMasterServices().getSnapshotManager()
469      .isTakingSnapshot(getParentRegion().getTable())) {
470      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() +
471        ", because we are taking snapshot for the table " + getParentRegion().getTable()));
472      return false;
473    }
474    // Check whether the region is splittable
475    RegionStateNode node =
476        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
477
478    if (node == null) {
479      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
480    }
481
482    RegionInfo parentHRI = node.getRegionInfo();
483    if (parentHRI == null) {
484      LOG.info("Unsplittable; parent region is null; node={}", node);
485      return false;
486    }
487    // Lookup the parent HRI state from the AM, which has the latest updated info.
488    // Protect against the case where concurrent SPLIT requests came in and succeeded
489    // just before us.
490    if (node.isInState(State.SPLIT)) {
491      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
492      return false;
493    }
494    if (parentHRI.isSplit() || parentHRI.isOffline()) {
495      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
496      return false;
497    }
498
499    // expected parent to be online or closed
500    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
501      // We may have SPLIT already?
502      setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() +
503          " FAILED because state=" + node.getState() + "; expected " +
504          Arrays.toString(EXPECTED_SPLIT_STATES)));
505      return false;
506    }
507
508    // Since we have the lock and the master is coordinating the operation
509    // we are always able to split the region
510    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
511      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
512      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() +
513          " failed due to split switch off"));
514      return false;
515    }
516
517    // See HBASE-21395, for 2.0.x and 2.1.x only.
518    // A safe fence here, if there is a table procedure going on, abort the split.
519    // There some cases that may lead to table procedure roll back (more serious
520    // than roll back the split procedure here), or the split parent was brought online
521    // by the table procedure because of the race between split procedure and table procedure
522    List<AbstractStateMachineTableProcedure> tableProcedures = env
523        .getMasterServices().getProcedures().stream()
524        .filter(p -> p instanceof AbstractStateMachineTableProcedure)
525        .map(p -> (AbstractStateMachineTableProcedure) p)
526        .filter(p -> p.getTableName().equals(getParentRegion().getTable()) &&
527            !p.isFinished() && TableQueue.requireTableExclusiveLock(p))
528        .collect(Collectors.toList());
529    if (tableProcedures != null && tableProcedures.size() > 0) {
530      throw new DoNotRetryIOException(tableProcedures.get(0).toString()
531          + " is going on against the same table, abort the split of " + this
532          .toString());
533    }
534
535    // set node state as SPLITTING
536    node.setState(State.SPLITTING);
537
538    return true;
539  }
540
541  /**
542   * Action before splitting region in a table.
543   * @param env MasterProcedureEnv
544   */
545  private void preSplitRegion(final MasterProcedureEnv env)
546      throws IOException, InterruptedException {
547    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
548    if (cpHost != null) {
549      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
550    }
551
552    // TODO: Clean up split and merge. Currently all over the place.
553    // Notify QuotaManager and RegionNormalizer
554    try {
555      env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion());
556    } catch (QuotaExceededException e) {
557      env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(),
558          NormalizationPlan.PlanType.SPLIT);
559      throw e;
560    }
561  }
562
563  /**
564   * Action after rollback a split table region action.
565   * @param env MasterProcedureEnv
566   */
567  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
568    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
569    if (cpHost != null) {
570      cpHost.postRollBackSplitRegionAction(getUser());
571    }
572  }
573
574  /**
575   * Rollback close parent region
576   * @param env MasterProcedureEnv
577   */
578  private void openParentRegion(final MasterProcedureEnv env) throws IOException {
579    // Check whether the region is closed; if so, open it in the same server
580    final int regionReplication = getRegionReplication(env);
581    final ServerName serverName = getParentRegionServerName(env);
582
583    final AssignProcedure[] procs = createAssignProcedures(regionReplication, env,
584      Collections.singletonList(getParentRegion()), serverName);
585    env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs);
586  }
587
588  /**
589   * Create daughter regions
590   * @param env MasterProcedureEnv
591   */
592  @VisibleForTesting
593  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
594    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
595    final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName());
596    final FileSystem fs = mfs.getFileSystem();
597    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
598      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
599    regionFs.createSplitsDir();
600
601    Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
602
603    assertReferenceFileCount(fs, expectedReferences.getFirst(),
604      regionFs.getSplitsDir(daughter_1_RI));
605    //Move the files from the temporary .splits to the final /table/region directory
606    regionFs.commitDaughterRegion(daughter_1_RI);
607    assertReferenceFileCount(fs, expectedReferences.getFirst(),
608      new Path(tabledir, daughter_1_RI.getEncodedName()));
609
610    assertReferenceFileCount(fs, expectedReferences.getSecond(),
611      regionFs.getSplitsDir(daughter_2_RI));
612    regionFs.commitDaughterRegion(daughter_2_RI);
613    assertReferenceFileCount(fs, expectedReferences.getSecond(),
614      new Path(tabledir, daughter_2_RI.getEncodedName()));
615  }
616
617  /**
618   * Create Split directory
619   * @param env MasterProcedureEnv
620   */
621  private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
622      final HRegionFileSystem regionFs) throws IOException {
623    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
624    final Configuration conf = env.getMasterConfiguration();
625    // The following code sets up a thread pool executor with as many slots as
626    // there's files to split. It then fires up everything, waits for
627    // completion and finally checks for any exception
628    //
629    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
630    // Nothing to unroll here if failure -- re-run createSplitsDir will
631    // clean this up.
632    int nbFiles = 0;
633    final Map<String, Collection<StoreFileInfo>> files =
634      new HashMap<String, Collection<StoreFileInfo>>(regionFs.getFamilies().size());
635    for (String family: regionFs.getFamilies()) {
636      Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family);
637      if (sfis == null) continue;
638      Collection<StoreFileInfo> filteredSfis = null;
639      for (StoreFileInfo sfi: sfis) {
640        // Filter. There is a lag cleaning up compacted reference files. They get cleared
641        // after a delay in case outstanding Scanners still have references. Because of this,
642        // the listing of the Store content may have straggler reference files. Skip these.
643        // It should be safe to skip references at this point because we checked above with
644        // the region if it thinks it is splittable and if we are here, it thinks it is
645        // splitable.
646        if (sfi.isReference()) {
647          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
648          continue;
649        }
650        if (filteredSfis == null) {
651          filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
652          files.put(family, filteredSfis);
653        }
654        filteredSfis.add(sfi);
655        nbFiles++;
656      }
657    }
658    if (nbFiles == 0) {
659      // no file needs to be splitted.
660      return new Pair<Integer, Integer>(0,0);
661    }
662    // Max #threads is the smaller of the number of storefiles or the default max determined above.
663    int maxThreads = Math.min(
664      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
665        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
666      nbFiles);
667    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" +
668      getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
669    final ExecutorService threadPool = Executors.newFixedThreadPool(
670      maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
671    final List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>>(nbFiles);
672
673    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
674    // Split each store file.
675    for (Map.Entry<String, Collection<StoreFileInfo>>e: files.entrySet()) {
676      byte [] familyName = Bytes.toBytes(e.getKey());
677      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
678      final Collection<StoreFileInfo> storeFiles = e.getValue();
679      if (storeFiles != null && storeFiles.size() > 0) {
680        final CacheConfig cacheConf = new CacheConfig(conf, hcd);
681        for (StoreFileInfo storeFileInfo: storeFiles) {
682          StoreFileSplitter sfs =
683              new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(),
684                  storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true));
685          futures.add(threadPool.submit(sfs));
686        }
687      }
688    }
689    // Shutdown the pool
690    threadPool.shutdown();
691
692    // Wait for all the tasks to finish.
693    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
694    // hbase.regionserver.fileSplitTimeout. If set, use its value.
695    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
696        conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
697    try {
698      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
699      if (stillRunning) {
700        threadPool.shutdownNow();
701        // wait for the thread to shutdown completely.
702        while (!threadPool.isTerminated()) {
703          Thread.sleep(50);
704        }
705        throw new IOException("Took too long to split the" +
706            " files and create the references, aborting split");
707      }
708    } catch (InterruptedException e) {
709      throw (InterruptedIOException)new InterruptedIOException().initCause(e);
710    }
711
712    int daughterA = 0;
713    int daughterB = 0;
714    // Look for any exception
715    for (Future<Pair<Path, Path>> future : futures) {
716      try {
717        Pair<Path, Path> p = future.get();
718        daughterA += p.getFirst() != null ? 1 : 0;
719        daughterB += p.getSecond() != null ? 1 : 0;
720      } catch (InterruptedException e) {
721        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
722      } catch (ExecutionException e) {
723        throw new IOException(e);
724      }
725    }
726
727    if (LOG.isDebugEnabled()) {
728      LOG.debug("pid=" + getProcId() + " split storefiles for region " +
729        getParentRegion().getShortNameToLog() +
730          " Daughter A: " + daughterA + " storefiles, Daughter B: " +
731          daughterB + " storefiles.");
732    }
733    return new Pair<Integer, Integer>(daughterA, daughterB);
734  }
735
736  private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount,
737      final Path dir) throws IOException {
738    if (expectedReferenceFileCount != 0 &&
739        expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) {
740      throw new IOException("Failing split. Expected reference file count isn't equal.");
741    }
742  }
743
744  private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf)
745    throws IOException {
746    if (LOG.isDebugEnabled()) {
747      LOG.debug("pid=" + getProcId() + " splitting started for store file: " +
748          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
749    }
750
751    final byte[] splitRow = getSplitRow();
752    final String familyName = Bytes.toString(family);
753    final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow,
754        false, splitPolicy);
755    final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow,
756       true, splitPolicy);
757    if (LOG.isDebugEnabled()) {
758      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " +
759          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
760    }
761    return new Pair<Path,Path>(path_first, path_second);
762  }
763
764  /**
765   * Utility class used to do the file splitting / reference writing
766   * in parallel instead of sequentially.
767   */
768  private class StoreFileSplitter implements Callable<Pair<Path,Path>> {
769    private final HRegionFileSystem regionFs;
770    private final byte[] family;
771    private final HStoreFile sf;
772
773    /**
774     * Constructor that takes what it needs to split
775     * @param regionFs the file system
776     * @param family Family that contains the store file
777     * @param sf which file
778     */
779    public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) {
780      this.regionFs = regionFs;
781      this.sf = sf;
782      this.family = family;
783    }
784
785    @Override
786    public Pair<Path,Path> call() throws IOException {
787      return splitStoreFile(regionFs, family, sf);
788    }
789  }
790
791  /**
792   * Post split region actions before the Point-of-No-Return step
793   * @param env MasterProcedureEnv
794   **/
795  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
796      throws IOException, InterruptedException {
797    final List<Mutation> metaEntries = new ArrayList<Mutation>();
798    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
799    if (cpHost != null) {
800      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
801      try {
802        for (Mutation p : metaEntries) {
803          RegionInfo.parseRegionName(p.getRow());
804        }
805      } catch (IOException e) {
806        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
807            + "region name."
808            + "Mutations from coprocessor should only for hbase:meta table.");
809        throw e;
810      }
811    }
812  }
813
814  /**
815   * Add daughter regions to META
816   * @param env MasterProcedureEnv
817   */
818  private void updateMeta(final MasterProcedureEnv env) throws IOException {
819    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
820      daughter_1_RI, daughter_2_RI);
821  }
822
823  /**
824   * Pre split region actions after the Point-of-No-Return step
825   * @param env MasterProcedureEnv
826   **/
827  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
828      throws IOException, InterruptedException {
829    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
830    if (cpHost != null) {
831      cpHost.preSplitAfterMETAAction(getUser());
832    }
833  }
834
835  /**
836   * Post split region actions
837   * @param env MasterProcedureEnv
838   **/
839  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
840    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
841    if (cpHost != null) {
842      cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser());
843    }
844  }
845
846  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
847    return env.getMasterServices().getAssignmentManager()
848      .getRegionStates().getRegionServerOfRegion(getParentRegion());
849  }
850
851  private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env,
852      final int regionReplication) {
853    final UnassignProcedure[] procs = new UnassignProcedure[regionReplication];
854    for (int i = 0; i < procs.length; ++i) {
855      final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i);
856      procs[i] = env.getAssignmentManager().
857          createUnassignProcedure(hri, null, true, !RegionReplicaUtil.isDefaultReplica(hri));
858    }
859    return procs;
860  }
861
862  private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env,
863      final int regionReplication) {
864    final ServerName targetServer = getParentRegionServerName(env);
865    List<RegionInfo> daughterRegions = new ArrayList<RegionInfo>(2);
866    daughterRegions.add(daughter_1_RI);
867    daughterRegions.add(daughter_2_RI);
868    return createAssignProcedures(regionReplication, env, daughterRegions, targetServer);
869  }
870
871  private AssignProcedure[] createAssignProcedures(final int regionReplication,
872      final MasterProcedureEnv env, final List<RegionInfo> hris, final ServerName serverName) {
873    final AssignProcedure[] procs = new AssignProcedure[hris.size() * regionReplication];
874    int procsIdx = 0;
875    for (int i = 0; i < hris.size(); ++i) {
876      // create procs for the primary region with the target server.
877      final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), 0);
878      procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, serverName);
879    }
880    if (regionReplication > 1) {
881      List<RegionInfo> regionReplicas =
882          new ArrayList<RegionInfo>(hris.size() * (regionReplication - 1));
883      for (int i = 0; i < hris.size(); ++i) {
884        // We don't include primary replica here
885        for (int j = 1; j < regionReplication; ++j) {
886          regionReplicas.add(RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j));
887        }
888      }
889      // for the replica regions exclude the primary region's server and call LB's roundRobin
890      // assignment
891      AssignProcedure[] replicaAssignProcs = env.getAssignmentManager()
892          .createRoundRobinAssignProcedures(regionReplicas, Collections.singletonList(serverName));
893      for (AssignProcedure proc : replicaAssignProcs) {
894        procs[procsIdx++] = proc;
895      }
896    }
897    return procs;
898  }
899
900  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
901    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
902    return htd.getRegionReplication();
903  }
904
905  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
906    FileSystem walFS = env.getMasterServices().getMasterWalManager().getFileSystem();
907    long maxSequenceId =
908      WALSplitter.getMaxRegionSequenceId(walFS, getWALRegionDir(env, getParentRegion()));
909    if (maxSequenceId > 0) {
910      WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughter_1_RI),
911          maxSequenceId);
912      WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughter_2_RI),
913          maxSequenceId);
914    }
915  }
916
917  /**
918   * The procedure could be restarted from a different machine. If the variable is null, we need to
919   * retrieve it.
920   * @return traceEnabled
921   */
922  private boolean isTraceEnabled() {
923    if (traceEnabled == null) {
924      traceEnabled = LOG.isTraceEnabled();
925    }
926    return traceEnabled;
927  }
928
929  @Override
930  protected boolean abort(MasterProcedureEnv env) {
931    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
932    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
933    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
934    return isRollbackSupported(getCurrentState())? super.abort(env): false;
935  }
936}