001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.io.InterruptedIOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.concurrent.Callable;
030import java.util.concurrent.ExecutionException;
031import java.util.concurrent.ExecutorService;
032import java.util.concurrent.Executors;
033import java.util.concurrent.Future;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Stream;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.fs.FileSystem;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.hbase.DoNotRetryIOException;
040import org.apache.hadoop.hbase.HConstants;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.UnknownRegionException;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
045import org.apache.hadoop.hbase.client.MasterSwitchType;
046import org.apache.hadoop.hbase.client.Mutation;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.TableDescriptor;
050import org.apache.hadoop.hbase.io.hfile.CacheConfig;
051import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
052import org.apache.hadoop.hbase.master.MasterFileSystem;
053import org.apache.hadoop.hbase.master.RegionState.State;
054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
060import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
061import org.apache.hadoop.hbase.quotas.QuotaExceededException;
062import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
063import org.apache.hadoop.hbase.regionserver.HStore;
064import org.apache.hadoop.hbase.regionserver.HStoreFile;
065import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
066import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction;
067import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
068import org.apache.hadoop.hbase.regionserver.StoreUtils;
069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
070import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
071import org.apache.hadoop.hbase.util.Bytes;
072import org.apache.hadoop.hbase.util.CommonFSUtils;
073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
074import org.apache.hadoop.hbase.util.FSUtils;
075import org.apache.hadoop.hbase.util.Pair;
076import org.apache.hadoop.hbase.util.Threads;
077import org.apache.hadoop.hbase.wal.WALSplitUtil;
078import org.apache.hadoop.util.ReflectionUtils;
079import org.apache.yetus.audience.InterfaceAudience;
080import org.slf4j.Logger;
081import org.slf4j.LoggerFactory;
082
083import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
084
085import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
086import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
088import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
089
090/**
091 * The procedure to split a region in a table. Takes lock on the parent region. It holds the lock
092 * for the life of the procedure.
093 * <p>
094 * Throws exception on construction if determines context hostile to spllt (cluster going down or
095 * master is shutting down or table is disabled).
096 * </p>
097 */
098@InterfaceAudience.Private
099public class SplitTableRegionProcedure
100  extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
101  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
102  private RegionInfo daughterOneRI;
103  private RegionInfo daughterTwoRI;
104  private byte[] bestSplitRow;
105  private RegionSplitPolicy splitPolicy;
106
107  public SplitTableRegionProcedure() {
108    // Required by the Procedure framework to create the procedure on replay
109  }
110
111  public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo regionToSplit,
112    final byte[] splitRow) throws IOException {
113    super(env, regionToSplit);
114    preflightChecks(env, true);
115    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
116    // we fail-fast on construction. There it skips the split with just a warning.
117    checkOnline(env, regionToSplit);
118    this.bestSplitRow = splitRow;
119    TableDescriptor tableDescriptor =
120      env.getMasterServices().getTableDescriptors().get(getTableName());
121    Configuration conf = env.getMasterConfiguration();
122    if (hasBestSplitRow()) {
123      // Apply the split restriction for the table to the user-specified split point
124      RegionSplitRestriction splitRestriction =
125        RegionSplitRestriction.create(tableDescriptor, conf);
126      byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow);
127      if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) {
128        LOG.warn(
129          "The specified split point {} violates the split restriction of the table. "
130            + "Using {} as a split point.",
131          Bytes.toStringBinary(bestSplitRow), Bytes.toStringBinary(restrictedSplitRow));
132        bestSplitRow = restrictedSplitRow;
133      }
134    }
135    checkSplittable(env, regionToSplit);
136    final TableName table = regionToSplit.getTable();
137    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
138    this.daughterOneRI =
139      RegionInfoBuilder.newBuilder(table).setStartKey(regionToSplit.getStartKey())
140        .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build();
141    this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow)
142      .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build();
143
144    if (tableDescriptor.getRegionSplitPolicyClassName() != null) {
145      // Since we don't have region reference here, creating the split policy instance without it.
146      // This can be used to invoke methods which don't require Region reference. This instantiation
147      // of a class on Master-side though it only makes sense on the RegionServer-side is
148      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
149      Class<? extends RegionSplitPolicy> clazz =
150        RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf);
151      this.splitPolicy = ReflectionUtils.newInstance(clazz, conf);
152    }
153  }
154
155  @Override
156  protected LockState acquireLock(final MasterProcedureEnv env) {
157    if (
158      env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(),
159        daughterOneRI, daughterTwoRI)
160    ) {
161      try {
162        LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks());
163      } catch (IOException e) {
164        // Ignore, just for logging
165      }
166      return LockState.LOCK_EVENT_WAIT;
167    }
168    return LockState.LOCK_ACQUIRED;
169  }
170
171  @Override
172  protected void releaseLock(final MasterProcedureEnv env) {
173    env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI,
174      daughterTwoRI);
175  }
176
177  public RegionInfo getDaughterOneRI() {
178    return daughterOneRI;
179  }
180
181  public RegionInfo getDaughterTwoRI() {
182    return daughterTwoRI;
183  }
184
185  private boolean hasBestSplitRow() {
186    return bestSplitRow != null && bestSplitRow.length > 0;
187  }
188
189  /**
190   * Check whether the region is splittable
191   * @param env           MasterProcedureEnv
192   * @param regionToSplit parent Region to be split
193   */
194  private void checkSplittable(final MasterProcedureEnv env, final RegionInfo regionToSplit)
195    throws IOException {
196    // Ask the remote RS if this region is splittable.
197    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at
198    // this time.
199    if (regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
200      throw new IllegalArgumentException("Can't invoke split on non-default regions directly");
201    }
202    RegionStateNode node =
203      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
204    IOException splittableCheckIOE = null;
205    boolean splittable = false;
206    if (node != null) {
207      try {
208        GetRegionInfoResponse response;
209        if (!hasBestSplitRow()) {
210          LOG.info(
211            "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}",
212            node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation());
213          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
214            node.getRegionInfo(), true);
215          bestSplitRow =
216            response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
217        } else {
218          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
219            node.getRegionInfo(), false);
220        }
221        splittable = response.hasSplittable() && response.getSplittable();
222        if (LOG.isDebugEnabled()) {
223          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
224        }
225      } catch (IOException e) {
226        splittableCheckIOE = e;
227      }
228    }
229
230    if (!splittable) {
231      IOException e =
232        new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
233      if (splittableCheckIOE != null) {
234        e.initCause(splittableCheckIOE);
235      }
236      throw e;
237    }
238
239    if (!hasBestSplitRow()) {
240      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, "
241        + "maybe table is too small for auto split. For force split, try specifying split row");
242    }
243
244    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
245      throw new DoNotRetryIOException(
246        "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow));
247    }
248
249    if (!regionToSplit.containsRow(bestSplitRow)) {
250      throw new DoNotRetryIOException("Split row is not inside region key range splitKey:"
251        + Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit);
252    }
253  }
254
255  /**
256   * Calculate daughter regionid to use.
257   * @param hri Parent {@link RegionInfo}
258   * @return Daughter region id (timestamp) to use.
259   */
260  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
261    long rid = EnvironmentEdgeManager.currentTime();
262    // Regionid is timestamp. Can't be less than that of parent else will insert
263    // at wrong location in hbase:meta (See HBASE-710).
264    if (rid < hri.getRegionId()) {
265      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId()
266        + " but current time here is " + rid);
267      rid = hri.getRegionId() + 1;
268    }
269    return rid;
270  }
271
272  private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException {
273    AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()),
274      getRegionReplication(env));
275  }
276
277  private void checkClosedRegions(MasterProcedureEnv env) throws IOException {
278    // theoretically this should not happen any more after we use TRSP, but anyway let's add a check
279    // here
280    AssignmentManagerUtil.checkClosedRegion(env, getParentRegion());
281  }
282
283  @Override
284  protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state)
285    throws InterruptedException {
286    LOG.trace("{} execute state={}", this, state);
287
288    try {
289      switch (state) {
290        case SPLIT_TABLE_REGION_PREPARE:
291          if (prepareSplitRegion(env)) {
292            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
293            break;
294          } else {
295            return Flow.NO_MORE_STATE;
296          }
297        case SPLIT_TABLE_REGION_PRE_OPERATION:
298          preSplitRegion(env);
299          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
300          break;
301        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
302          addChildProcedure(createUnassignProcedures(env));
303          // createUnassignProcedures() can throw out IOException. If this happens,
304          // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions
305          // is closed as all created UnassignProcedures are rolled back. If it rolls back with
306          // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(),
307          // otherwise, it will result in OpenRegionProcedure for an already open region.
308          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
309          break;
310        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
311          checkClosedRegions(env);
312          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
313          break;
314        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
315          removeNonDefaultReplicas(env);
316          createDaughterRegions(env);
317          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
318          break;
319        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
320          writeMaxSequenceIdFile(env);
321          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
322          break;
323        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
324          preSplitRegionBeforeMETA(env);
325          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
326          break;
327        case SPLIT_TABLE_REGION_UPDATE_META:
328          updateMeta(env);
329          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
330          break;
331        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
332          preSplitRegionAfterMETA(env);
333          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
334          break;
335        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
336          addChildProcedure(createAssignProcedures(env));
337          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
338          break;
339        case SPLIT_TABLE_REGION_POST_OPERATION:
340          postSplitRegion(env);
341          return Flow.NO_MORE_STATE;
342        default:
343          throw new UnsupportedOperationException(this + " unhandled state=" + state);
344      }
345    } catch (IOException e) {
346      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
347      if (!isRollbackSupported(state)) {
348        // We reach a state that cannot be rolled back. We just need to keep retrying.
349        LOG.warn(msg, e);
350      } else {
351        LOG.error(msg, e);
352        setFailure("master-split-regions", e);
353      }
354    }
355    // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split
356    return Flow.HAS_MORE_STATE;
357  }
358
359  /**
360   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously submitted
361   * for parent region to be split (rollback doesn't wait on the completion of the AssignProcedure)
362   * . This can be improved by changing rollback() to support sub-procedures. See HBASE-19851 for
363   * details.
364   */
365  @Override
366  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
367    throws IOException, InterruptedException {
368    LOG.trace("{} rollback state={}", this, state);
369
370    try {
371      switch (state) {
372        case SPLIT_TABLE_REGION_POST_OPERATION:
373        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
374        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
375        case SPLIT_TABLE_REGION_UPDATE_META:
376          // PONR
377          throw new UnsupportedOperationException(this + " unhandled state=" + state);
378        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
379          break;
380        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
381        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
382          deleteDaughterRegions(env);
383          break;
384        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
385          openParentRegion(env);
386          break;
387        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
388          // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call
389          // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an
390          // already open region.
391          break;
392        case SPLIT_TABLE_REGION_PRE_OPERATION:
393          postRollBackSplitRegion(env);
394          break;
395        case SPLIT_TABLE_REGION_PREPARE:
396          rollbackPrepareSplit(env);
397          break;
398        default:
399          throw new UnsupportedOperationException(this + " unhandled state=" + state);
400      }
401    } catch (IOException e) {
402      // This will be retried. Unless there is a bug in the code,
403      // this should be just a "temporary error" (e.g. network down)
404      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state
405        + " for splitting the region " + getParentRegion().getEncodedName() + " in table "
406        + getTableName(), e);
407      throw e;
408    }
409  }
410
411  /*
412   * Check whether we are in the state that can be rollback
413   */
414  @Override
415  protected boolean isRollbackSupported(final SplitTableRegionState state) {
416    switch (state) {
417      case SPLIT_TABLE_REGION_POST_OPERATION:
418      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
419      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
420      case SPLIT_TABLE_REGION_UPDATE_META:
421        // It is not safe to rollback if we reach to these states.
422        return false;
423      default:
424        break;
425    }
426    return true;
427  }
428
429  @Override
430  protected SplitTableRegionState getState(final int stateId) {
431    return SplitTableRegionState.forNumber(stateId);
432  }
433
434  @Override
435  protected int getStateId(final SplitTableRegionState state) {
436    return state.getNumber();
437  }
438
439  @Override
440  protected SplitTableRegionState getInitialState() {
441    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
442  }
443
444  @Override
445  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
446    super.serializeStateData(serializer);
447
448    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
449      MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
450        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
451        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
452        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI))
453        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI));
454    serializer.serialize(splitTableRegionMsg.build());
455  }
456
457  @Override
458  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
459    super.deserializeStateData(serializer);
460
461    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
462      serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
463    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
464    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
465    assert (splitTableRegionsMsg.getChildRegionInfoCount() == 2);
466    daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
467    daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
468  }
469
470  @Override
471  public void toStringClassDetails(StringBuilder sb) {
472    sb.append(getClass().getSimpleName());
473    sb.append(" table=");
474    sb.append(getTableName());
475    sb.append(", parent=");
476    sb.append(getParentRegion().getShortNameToLog());
477    sb.append(", daughterA=");
478    sb.append(daughterOneRI.getShortNameToLog());
479    sb.append(", daughterB=");
480    sb.append(daughterTwoRI.getShortNameToLog());
481  }
482
483  private RegionInfo getParentRegion() {
484    return getRegion();
485  }
486
487  @Override
488  public TableOperationType getTableOperationType() {
489    return TableOperationType.REGION_SPLIT;
490  }
491
492  @Override
493  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
494    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
495  }
496
497  private byte[] getSplitRow() {
498    return daughterTwoRI.getStartKey();
499  }
500
501  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
502
503  /**
504   * Prepare to Split region.
505   * @param env MasterProcedureEnv
506   */
507  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
508    // Fail if we are taking snapshot for the given table
509    if (
510      env.getMasterServices().getSnapshotManager()
511        .isTableTakingAnySnapshot(getParentRegion().getTable())
512    ) {
513      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog()
514        + ", because we are taking snapshot for the table " + getParentRegion().getTable()));
515      return false;
516    }
517    // Check whether the region is splittable
518    RegionStateNode node =
519      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
520
521    if (node == null) {
522      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
523    }
524
525    RegionInfo parentHRI = node.getRegionInfo();
526    if (parentHRI == null) {
527      LOG.info("Unsplittable; parent region is null; node={}", node);
528      return false;
529    }
530    // Lookup the parent HRI state from the AM, which has the latest updated info.
531    // Protect against the case where concurrent SPLIT requests came in and succeeded
532    // just before us.
533    if (node.isInState(State.SPLIT)) {
534      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
535      return false;
536    }
537    if (parentHRI.isSplit() || parentHRI.isOffline()) {
538      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
539      return false;
540    }
541
542    // expected parent to be online or closed
543    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
544      // We may have SPLIT already?
545      setFailure(
546        new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state="
547          + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES)));
548      return false;
549    }
550
551    // Mostly the below two checks are not used because we already check the switches before
552    // submitting the split procedure. Just for safety, we are checking the switch again here.
553    // Also, in case the switch was set to false after submission, this procedure can be rollbacked,
554    // thanks to this double check!
555    // case 1: check for cluster level switch
556    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
557      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
558      setFailure(new IOException(
559        "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off"));
560      return false;
561    }
562    // case 2: check for table level switch
563    if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) {
564      LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(),
565        parentHRI);
566      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString()
567        + " failed as region split is disabled for the table"));
568      return false;
569    }
570
571    // set node state as SPLITTING
572    node.setState(State.SPLITTING);
573
574    // Since we have the lock and the master is coordinating the operation
575    // we are always able to split the region
576    return true;
577  }
578
579  /**
580   * Rollback prepare split region
581   * @param env MasterProcedureEnv
582   */
583  private void rollbackPrepareSplit(final MasterProcedureEnv env) {
584    RegionStateNode parentRegionStateNode =
585      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
586    if (parentRegionStateNode.getState() == State.SPLITTING) {
587      parentRegionStateNode.setState(State.OPEN);
588    }
589  }
590
591  /**
592   * Action before splitting region in a table.
593   * @param env MasterProcedureEnv
594   */
595  private void preSplitRegion(final MasterProcedureEnv env)
596    throws IOException, InterruptedException {
597    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
598    if (cpHost != null) {
599      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
600    }
601
602    // TODO: Clean up split and merge. Currently all over the place.
603    // Notify QuotaManager and RegionNormalizer
604    try {
605      MasterQuotaManager masterQuotaManager = env.getMasterServices().getMasterQuotaManager();
606      if (masterQuotaManager != null) {
607        masterQuotaManager.onRegionSplit(this.getParentRegion());
608      }
609    } catch (QuotaExceededException e) {
610      // TODO: why is this here? split requests can be submitted by actors other than the normalizer
611      env.getMasterServices().getRegionNormalizerManager()
612        .planSkipped(NormalizationPlan.PlanType.SPLIT);
613      throw e;
614    }
615  }
616
617  /**
618   * Action after rollback a split table region action.
619   * @param env MasterProcedureEnv
620   */
621  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
622    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
623    if (cpHost != null) {
624      cpHost.postRollBackSplitRegionAction(getUser());
625    }
626  }
627
628  /**
629   * Rollback close parent region
630   */
631  private void openParentRegion(MasterProcedureEnv env) throws IOException {
632    AssignmentManagerUtil.reopenRegionsForRollback(env,
633      Collections.singletonList((getParentRegion())), getRegionReplication(env),
634      getParentRegionServerName(env));
635  }
636
637  /**
638   * Create daughter regions
639   */
640  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
641    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
642    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
643    final FileSystem fs = mfs.getFileSystem();
644    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
645      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
646    regionFs.createSplitsDir(daughterOneRI, daughterTwoRI);
647
648    Pair<List<Path>, List<Path>> expectedReferences = splitStoreFiles(env, regionFs);
649
650    assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(),
651      regionFs.getSplitsDir(daughterOneRI));
652    regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env);
653    assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(),
654      new Path(tabledir, daughterOneRI.getEncodedName()));
655
656    assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(),
657      regionFs.getSplitsDir(daughterTwoRI));
658    regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env);
659    assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(),
660      new Path(tabledir, daughterTwoRI.getEncodedName()));
661  }
662
663  private void deleteDaughterRegions(final MasterProcedureEnv env) throws IOException {
664    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
665    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
666    HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
667      tabledir, daughterOneRI);
668    HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
669      tabledir, daughterTwoRI);
670  }
671
672  /**
673   * Create Split directory
674   * @param env MasterProcedureEnv
675   */
676  private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv env,
677    final HRegionFileSystem regionFs) throws IOException {
678    final Configuration conf = env.getMasterConfiguration();
679    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
680    // The following code sets up a thread pool executor with as many slots as
681    // there's files to split. It then fires up everything, waits for
682    // completion and finally checks for any exception
683    //
684    // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the
685    // table dir. In case of failure, the proc would go through this again, already existing
686    // region dirs and split files would just be ignored, new split files should get created.
687    int nbFiles = 0;
688    final Map<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>> files =
689      new HashMap<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>>(
690        htd.getColumnFamilyCount());
691    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
692      String family = cfd.getNameAsString();
693      StoreFileTracker tracker =
694        StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs);
695      Collection<StoreFileInfo> sfis = tracker.load();
696      if (sfis == null) {
697        continue;
698      }
699      Collection<StoreFileInfo> filteredSfis = null;
700      for (StoreFileInfo sfi : sfis) {
701        // Filter. There is a lag cleaning up compacted reference files. They get cleared
702        // after a delay in case outstanding Scanners still have references. Because of this,
703        // the listing of the Store content may have straggler reference files. Skip these.
704        // It should be safe to skip references at this point because we checked above with
705        // the region if it thinks it is splittable and if we are here, it thinks it is
706        // splitable.
707        if (sfi.isReference()) {
708          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
709          continue;
710        }
711        if (filteredSfis == null) {
712          filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
713          files.put(family, new Pair(filteredSfis, tracker));
714        }
715        filteredSfis.add(sfi);
716        nbFiles++;
717      }
718    }
719    if (nbFiles == 0) {
720      // no file needs to be splitted.
721      return new Pair<>(Collections.emptyList(), Collections.emptyList());
722    }
723    // Max #threads is the smaller of the number of storefiles or the default max determined above.
724    int maxThreads = Math.min(
725      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
726        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
727      nbFiles);
728    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region="
729      + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
730    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
731      new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true)
732        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
733    final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);
734
735    // Split each store file.
736    for (Map.Entry<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>> e : files
737      .entrySet()) {
738      byte[] familyName = Bytes.toBytes(e.getKey());
739      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
740      Pair<Collection<StoreFileInfo>, StoreFileTracker> storeFilesAndTracker = e.getValue();
741      final Collection<StoreFileInfo> storeFiles = storeFilesAndTracker.getFirst();
742      if (storeFiles != null && storeFiles.size() > 0) {
743        final Configuration storeConfiguration =
744          StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd);
745        for (StoreFileInfo storeFileInfo : storeFiles) {
746          // As this procedure is running on master, use CacheConfig.DISABLED means
747          // don't cache any block.
748          // We also need to pass through a suitable CompoundConfiguration as if this
749          // is running in a regionserver's Store context, or we might not be able
750          // to read the hfiles.
751          storeFileInfo.setConf(storeConfiguration);
752          StoreFileSplitter sfs =
753            new StoreFileSplitter(regionFs, storeFilesAndTracker.getSecond(), familyName,
754              new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED));
755          futures.add(threadPool.submit(sfs));
756        }
757      }
758    }
759    // Shutdown the pool
760    threadPool.shutdown();
761
762    // Wait for all the tasks to finish.
763    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
764    // hbase.regionserver.fileSplitTimeout. If set, use its value.
765    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
766      conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
767    try {
768      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
769      if (stillRunning) {
770        threadPool.shutdownNow();
771        // wait for the thread to shutdown completely.
772        while (!threadPool.isTerminated()) {
773          Thread.sleep(50);
774        }
775        throw new IOException(
776          "Took too long to split the" + " files and create the references, aborting split");
777      }
778    } catch (InterruptedException e) {
779      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
780    }
781
782    List<Path> daughterA = new ArrayList<>();
783    List<Path> daughterB = new ArrayList<>();
784    // Look for any exception
785    for (Future<Pair<Path, Path>> future : futures) {
786      try {
787        Pair<Path, Path> p = future.get();
788        if (p.getFirst() != null) {
789          daughterA.add(p.getFirst());
790        }
791        if (p.getSecond() != null) {
792          daughterB.add(p.getSecond());
793        }
794      } catch (InterruptedException e) {
795        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
796      } catch (ExecutionException e) {
797        throw new IOException(e);
798      }
799    }
800
801    if (LOG.isDebugEnabled()) {
802      LOG.debug("pid=" + getProcId() + " split storefiles for region "
803        + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA
804        + " storefiles, Daughter B: " + daughterB + " storefiles.");
805    }
806    return new Pair<>(daughterA, daughterB);
807  }
808
809  private void assertSplitResultFilesCount(final FileSystem fs,
810    final int expectedSplitResultFileCount, Path dir) throws IOException {
811    if (expectedSplitResultFileCount != 0) {
812      int resultFileCount = FSUtils.getRegionReferenceAndLinkFileCount(fs, dir);
813      if (expectedSplitResultFileCount != resultFileCount) {
814        throw new IOException("Failing split. Didn't have expected reference and HFileLink files"
815          + ", expected=" + expectedSplitResultFileCount + ", actual=" + resultFileCount);
816      }
817    }
818  }
819
820  private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, StoreFileTracker tracker,
821    byte[] family, HStoreFile sf) throws IOException {
822    if (LOG.isDebugEnabled()) {
823      LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath()
824        + " for region: " + getParentRegion().getShortNameToLog());
825    }
826
827    final byte[] splitRow = getSplitRow();
828    final String familyName = Bytes.toString(family);
829    final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow,
830      false, splitPolicy, tracker);
831    final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow,
832      true, splitPolicy, tracker);
833    if (LOG.isDebugEnabled()) {
834      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath()
835        + " for region: " + getParentRegion().getShortNameToLog());
836    }
837    return new Pair<Path, Path>(path_first, path_second);
838  }
839
840  /**
841   * Utility class used to do the file splitting / reference writing in parallel instead of
842   * sequentially.
843   */
844  private class StoreFileSplitter implements Callable<Pair<Path, Path>> {
845    private final HRegionFileSystem regionFs;
846    private final byte[] family;
847    private final HStoreFile sf;
848    private final StoreFileTracker tracker;
849
850    /**
851     * Constructor that takes what it needs to split
852     * @param regionFs the file system
853     * @param family   Family that contains the store file
854     * @param sf       which file
855     */
856    public StoreFileSplitter(HRegionFileSystem regionFs, StoreFileTracker tracker, byte[] family,
857      HStoreFile sf) {
858      this.regionFs = regionFs;
859      this.sf = sf;
860      this.family = family;
861      this.tracker = tracker;
862    }
863
864    @Override
865    public Pair<Path, Path> call() throws IOException {
866      return splitStoreFile(regionFs, tracker, family, sf);
867    }
868  }
869
870  /**
871   * Post split region actions before the Point-of-No-Return step
872   * @param env MasterProcedureEnv
873   **/
874  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
875    throws IOException, InterruptedException {
876    final List<Mutation> metaEntries = new ArrayList<Mutation>();
877    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
878    if (cpHost != null) {
879      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
880      try {
881        for (Mutation p : metaEntries) {
882          RegionInfo.parseRegionName(p.getRow());
883        }
884      } catch (IOException e) {
885        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
886          + "region name." + "Mutations from coprocessor should only for hbase:meta table.");
887        throw e;
888      }
889    }
890  }
891
892  /**
893   * Add daughter regions to META
894   * @param env MasterProcedureEnv
895   */
896  private void updateMeta(final MasterProcedureEnv env) throws IOException {
897    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
898      daughterOneRI, daughterTwoRI);
899  }
900
901  /**
902   * Pre split region actions after the Point-of-No-Return step
903   * @param env MasterProcedureEnv
904   **/
905  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
906    throws IOException, InterruptedException {
907    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
908    if (cpHost != null) {
909      cpHost.preSplitAfterMETAAction(getUser());
910    }
911  }
912
913  /**
914   * Post split region actions
915   * @param env MasterProcedureEnv
916   **/
917  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
918    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
919    if (cpHost != null) {
920      cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser());
921    }
922  }
923
924  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
925    return env.getMasterServices().getAssignmentManager().getRegionStates()
926      .getRegionServerOfRegion(getParentRegion());
927  }
928
929  private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env)
930    throws IOException {
931    return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env,
932      Stream.of(getParentRegion()), getRegionReplication(env));
933  }
934
935  private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
936    throws IOException {
937    List<RegionInfo> hris = new ArrayList<RegionInfo>(2);
938    hris.add(daughterOneRI);
939    hris.add(daughterTwoRI);
940    return AssignmentManagerUtil.createAssignProceduresForSplitDaughters(env, hris,
941      getRegionReplication(env), getParentRegionServerName(env));
942  }
943
944  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
945    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
946    return htd.getRegionReplication();
947  }
948
949  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
950    MasterFileSystem fs = env.getMasterFileSystem();
951    long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(),
952      getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
953    if (maxSequenceId > 0) {
954      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
955        getWALRegionDir(env, daughterOneRI), maxSequenceId);
956      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
957        getWALRegionDir(env, daughterTwoRI), maxSequenceId);
958    }
959  }
960
961  @Override
962  protected boolean abort(MasterProcedureEnv env) {
963    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
964    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
965    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
966    return isRollbackSupported(getCurrentState()) ? super.abort(env) : false;
967  }
968}