001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.io.InterruptedIOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.concurrent.Callable;
030import java.util.concurrent.ExecutionException;
031import java.util.concurrent.ExecutorService;
032import java.util.concurrent.Executors;
033import java.util.concurrent.Future;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Stream;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.fs.FileSystem;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.hbase.DoNotRetryIOException;
040import org.apache.hadoop.hbase.HConstants;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.UnknownRegionException;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
045import org.apache.hadoop.hbase.client.MasterSwitchType;
046import org.apache.hadoop.hbase.client.Mutation;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.TableDescriptor;
050import org.apache.hadoop.hbase.io.hfile.CacheConfig;
051import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
052import org.apache.hadoop.hbase.master.MasterFileSystem;
053import org.apache.hadoop.hbase.master.RegionState.State;
054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
060import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
061import org.apache.hadoop.hbase.quotas.QuotaExceededException;
062import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
063import org.apache.hadoop.hbase.regionserver.HStore;
064import org.apache.hadoop.hbase.regionserver.HStoreFile;
065import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
066import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction;
067import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
068import org.apache.hadoop.hbase.regionserver.StoreUtils;
069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
070import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
071import org.apache.hadoop.hbase.util.Bytes;
072import org.apache.hadoop.hbase.util.CommonFSUtils;
073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
074import org.apache.hadoop.hbase.util.FSUtils;
075import org.apache.hadoop.hbase.util.Pair;
076import org.apache.hadoop.hbase.util.Threads;
077import org.apache.hadoop.hbase.wal.WALSplitUtil;
078import org.apache.hadoop.util.ReflectionUtils;
079import org.apache.yetus.audience.InterfaceAudience;
080import org.slf4j.Logger;
081import org.slf4j.LoggerFactory;
082
083import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
084
085import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
086import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
088import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
089
090/**
091 * The procedure to split a region in a table. Takes lock on the parent region. It holds the lock
092 * for the life of the procedure.
093 * <p>
094 * Throws exception on construction if determines context hostile to spllt (cluster going down or
095 * master is shutting down or table is disabled).
096 * </p>
097 */
098@InterfaceAudience.Private
099public class SplitTableRegionProcedure
100  extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
101  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
102  private RegionInfo daughterOneRI;
103  private RegionInfo daughterTwoRI;
104  private byte[] bestSplitRow;
105  private RegionSplitPolicy splitPolicy;
106
107  public SplitTableRegionProcedure() {
108    // Required by the Procedure framework to create the procedure on replay
109  }
110
111  public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo regionToSplit,
112    final byte[] splitRow) throws IOException {
113    super(env, regionToSplit);
114    preflightChecks(env, true);
115    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
116    // we fail-fast on construction. There it skips the split with just a warning.
117    checkOnline(env, regionToSplit);
118    this.bestSplitRow = splitRow;
119    TableDescriptor tableDescriptor =
120      env.getMasterServices().getTableDescriptors().get(getTableName());
121    Configuration conf = env.getMasterConfiguration();
122    if (hasBestSplitRow()) {
123      // Apply the split restriction for the table to the user-specified split point
124      RegionSplitRestriction splitRestriction =
125        RegionSplitRestriction.create(tableDescriptor, conf);
126      byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow);
127      if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) {
128        LOG.warn(
129          "The specified split point {} violates the split restriction of the table. "
130            + "Using {} as a split point.",
131          Bytes.toStringBinary(bestSplitRow), Bytes.toStringBinary(restrictedSplitRow));
132        bestSplitRow = restrictedSplitRow;
133      }
134    }
135    checkSplittable(env, regionToSplit);
136    final TableName table = regionToSplit.getTable();
137    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
138    this.daughterOneRI =
139      RegionInfoBuilder.newBuilder(table).setStartKey(regionToSplit.getStartKey())
140        .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build();
141    this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow)
142      .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build();
143
144    if (tableDescriptor.getRegionSplitPolicyClassName() != null) {
145      // Since we don't have region reference here, creating the split policy instance without it.
146      // This can be used to invoke methods which don't require Region reference. This instantiation
147      // of a class on Master-side though it only makes sense on the RegionServer-side is
148      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
149      Class<? extends RegionSplitPolicy> clazz =
150        RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf);
151      this.splitPolicy = ReflectionUtils.newInstance(clazz, conf);
152    }
153  }
154
155  @Override
156  protected LockState acquireLock(final MasterProcedureEnv env) {
157    if (
158      env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(),
159        daughterOneRI, daughterTwoRI)
160    ) {
161      try {
162        LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks());
163      } catch (IOException e) {
164        // Ignore, just for logging
165      }
166      return LockState.LOCK_EVENT_WAIT;
167    }
168    return LockState.LOCK_ACQUIRED;
169  }
170
171  @Override
172  protected void releaseLock(final MasterProcedureEnv env) {
173    env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI,
174      daughterTwoRI);
175  }
176
177  public RegionInfo getDaughterOneRI() {
178    return daughterOneRI;
179  }
180
181  public RegionInfo getDaughterTwoRI() {
182    return daughterTwoRI;
183  }
184
185  private boolean hasBestSplitRow() {
186    return bestSplitRow != null && bestSplitRow.length > 0;
187  }
188
189  /**
190   * Check whether the region is splittable
191   * @param env           MasterProcedureEnv
192   * @param regionToSplit parent Region to be split
193   */
194  private void checkSplittable(final MasterProcedureEnv env, final RegionInfo regionToSplit)
195    throws IOException {
196    // Ask the remote RS if this region is splittable.
197    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at
198    // this time.
199    if (regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
200      throw new IllegalArgumentException("Can't invoke split on non-default regions directly");
201    }
202    RegionStateNode node =
203      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
204    IOException splittableCheckIOE = null;
205    boolean splittable = false;
206    if (node != null) {
207      try {
208        GetRegionInfoResponse response;
209        if (!hasBestSplitRow()) {
210          LOG.info(
211            "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}",
212            node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation());
213          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
214            node.getRegionInfo(), true);
215          bestSplitRow =
216            response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
217        } else {
218          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
219            node.getRegionInfo(), false);
220        }
221        splittable = response.hasSplittable() && response.getSplittable();
222        if (LOG.isDebugEnabled()) {
223          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
224        }
225      } catch (IOException e) {
226        splittableCheckIOE = e;
227      }
228    }
229
230    if (!splittable) {
231      IOException e =
232        new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
233      if (splittableCheckIOE != null) {
234        e.initCause(splittableCheckIOE);
235      }
236      throw e;
237    }
238
239    if (!hasBestSplitRow()) {
240      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, "
241        + "maybe table is too small for auto split. For force split, try specifying split row");
242    }
243
244    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
245      throw new DoNotRetryIOException(
246        "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow));
247    }
248
249    if (!regionToSplit.containsRow(bestSplitRow)) {
250      throw new DoNotRetryIOException("Split row is not inside region key range splitKey:"
251        + Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit);
252    }
253  }
254
255  /**
256   * Calculate daughter regionid to use.
257   * @param hri Parent {@link RegionInfo}
258   * @return Daughter region id (timestamp) to use.
259   */
260  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
261    long rid = EnvironmentEdgeManager.currentTime();
262    // Regionid is timestamp. Can't be less than that of parent else will insert
263    // at wrong location in hbase:meta (See HBASE-710).
264    if (rid < hri.getRegionId()) {
265      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId()
266        + " but current time here is " + rid);
267      rid = hri.getRegionId() + 1;
268    }
269    return rid;
270  }
271
272  private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException {
273    AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()),
274      getRegionReplication(env));
275  }
276
277  private void checkClosedRegions(MasterProcedureEnv env) throws IOException {
278    // theoretically this should not happen any more after we use TRSP, but anyway let's add a check
279    // here
280    AssignmentManagerUtil.checkClosedRegion(env, getParentRegion());
281  }
282
283  @Override
284  protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state)
285    throws InterruptedException {
286    LOG.trace("{} execute state={}", this, state);
287
288    try {
289      switch (state) {
290        case SPLIT_TABLE_REGION_PREPARE:
291          if (prepareSplitRegion(env)) {
292            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
293            break;
294          } else {
295            return Flow.NO_MORE_STATE;
296          }
297        case SPLIT_TABLE_REGION_PRE_OPERATION:
298          preSplitRegion(env);
299          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
300          break;
301        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
302          addChildProcedure(createUnassignProcedures(env));
303          // createUnassignProcedures() can throw out IOException. If this happens,
304          // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions
305          // is closed as all created UnassignProcedures are rolled back. If it rolls back with
306          // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(),
307          // otherwise, it will result in OpenRegionProcedure for an already open region.
308          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
309          break;
310        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
311          checkClosedRegions(env);
312          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
313          break;
314        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
315          removeNonDefaultReplicas(env);
316          createDaughterRegions(env);
317          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
318          break;
319        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
320          writeMaxSequenceIdFile(env);
321          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
322          break;
323        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
324          preSplitRegionBeforeMETA(env);
325          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
326          break;
327        case SPLIT_TABLE_REGION_UPDATE_META:
328          updateMeta(env);
329          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
330          break;
331        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
332          preSplitRegionAfterMETA(env);
333          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
334          break;
335        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
336          addChildProcedure(createAssignProcedures(env));
337          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
338          break;
339        case SPLIT_TABLE_REGION_POST_OPERATION:
340          postSplitRegion(env);
341          return Flow.NO_MORE_STATE;
342        default:
343          throw new UnsupportedOperationException(this + " unhandled state=" + state);
344      }
345    } catch (IOException e) {
346      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
347      if (!isRollbackSupported(state)) {
348        // We reach a state that cannot be rolled back. We just need to keep retrying.
349        LOG.warn(msg, e);
350      } else {
351        LOG.error(msg, e);
352        setFailure("master-split-regions", e);
353      }
354    }
355    // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split
356    return Flow.HAS_MORE_STATE;
357  }
358
359  /**
360   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously submitted
361   * for parent region to be split (rollback doesn't wait on the completion of the AssignProcedure)
362   * . This can be improved by changing rollback() to support sub-procedures. See HBASE-19851 for
363   * details.
364   */
365  @Override
366  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
367    throws IOException, InterruptedException {
368    LOG.trace("{} rollback state={}", this, state);
369
370    try {
371      switch (state) {
372        case SPLIT_TABLE_REGION_POST_OPERATION:
373        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
374        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
375        case SPLIT_TABLE_REGION_UPDATE_META:
376          // PONR
377          throw new UnsupportedOperationException(this + " unhandled state=" + state);
378        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
379          break;
380        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
381        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
382          deleteDaughterRegions(env);
383          break;
384        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
385          openParentRegion(env);
386          break;
387        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
388          // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call
389          // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an
390          // already open region.
391          break;
392        case SPLIT_TABLE_REGION_PRE_OPERATION:
393          postRollBackSplitRegion(env);
394          break;
395        case SPLIT_TABLE_REGION_PREPARE:
396          rollbackPrepareSplit(env);
397          break;
398        default:
399          throw new UnsupportedOperationException(this + " unhandled state=" + state);
400      }
401    } catch (IOException e) {
402      // This will be retried. Unless there is a bug in the code,
403      // this should be just a "temporary error" (e.g. network down)
404      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state
405        + " for splitting the region " + getParentRegion().getEncodedName() + " in table "
406        + getTableName(), e);
407      throw e;
408    }
409  }
410
411  /*
412   * Check whether we are in the state that can be rollback
413   */
414  @Override
415  protected boolean isRollbackSupported(final SplitTableRegionState state) {
416    switch (state) {
417      case SPLIT_TABLE_REGION_POST_OPERATION:
418      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
419      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
420      case SPLIT_TABLE_REGION_UPDATE_META:
421        // It is not safe to rollback if we reach to these states.
422        return false;
423      default:
424        break;
425    }
426    return true;
427  }
428
429  @Override
430  protected SplitTableRegionState getState(final int stateId) {
431    return SplitTableRegionState.forNumber(stateId);
432  }
433
434  @Override
435  protected int getStateId(final SplitTableRegionState state) {
436    return state.getNumber();
437  }
438
439  @Override
440  protected SplitTableRegionState getInitialState() {
441    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
442  }
443
444  @Override
445  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
446    super.serializeStateData(serializer);
447
448    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
449      MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
450        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
451        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
452        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI))
453        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI));
454    serializer.serialize(splitTableRegionMsg.build());
455  }
456
457  @Override
458  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
459    super.deserializeStateData(serializer);
460
461    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
462      serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
463    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
464    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
465    assert (splitTableRegionsMsg.getChildRegionInfoCount() == 2);
466    daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
467    daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
468  }
469
470  @Override
471  public void toStringClassDetails(StringBuilder sb) {
472    sb.append(getClass().getSimpleName());
473    sb.append(" table=");
474    sb.append(getTableName());
475    sb.append(", parent=");
476    sb.append(getParentRegion().getShortNameToLog());
477    sb.append(", daughterA=");
478    sb.append(daughterOneRI.getShortNameToLog());
479    sb.append(", daughterB=");
480    sb.append(daughterTwoRI.getShortNameToLog());
481  }
482
483  private RegionInfo getParentRegion() {
484    return getRegion();
485  }
486
487  @Override
488  public TableOperationType getTableOperationType() {
489    return TableOperationType.REGION_SPLIT;
490  }
491
492  @Override
493  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
494    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
495  }
496
497  private byte[] getSplitRow() {
498    return daughterTwoRI.getStartKey();
499  }
500
501  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
502
503  /**
504   * Prepare to Split region.
505   * @param env MasterProcedureEnv
506   */
507  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
508    // Fail if we are taking snapshot for the given table
509    if (
510      env.getMasterServices().getSnapshotManager()
511        .isTableTakingAnySnapshot(getParentRegion().getTable())
512    ) {
513      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog()
514        + ", because we are taking snapshot for the table " + getParentRegion().getTable()));
515      return false;
516    }
517    // Check whether the region is splittable
518    RegionStateNode node =
519      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
520
521    if (node == null) {
522      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
523    }
524
525    RegionInfo parentHRI = node.getRegionInfo();
526    if (parentHRI == null) {
527      LOG.info("Unsplittable; parent region is null; node={}", node);
528      return false;
529    }
530    // Lookup the parent HRI state from the AM, which has the latest updated info.
531    // Protect against the case where concurrent SPLIT requests came in and succeeded
532    // just before us.
533    if (node.isInState(State.SPLIT)) {
534      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
535      return false;
536    }
537    if (parentHRI.isSplit() || parentHRI.isOffline()) {
538      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
539      return false;
540    }
541
542    // expected parent to be online or closed
543    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
544      // We may have SPLIT already?
545      setFailure(
546        new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state="
547          + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES)));
548      return false;
549    }
550
551    // Mostly the below two checks are not used because we already check the switches before
552    // submitting the split procedure. Just for safety, we are checking the switch again here.
553    // Also, in case the switch was set to false after submission, this procedure can be rollbacked,
554    // thanks to this double check!
555    // case 1: check for cluster level switch
556    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
557      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
558      setFailure(new IOException(
559        "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off"));
560      return false;
561    }
562    // case 2: check for table level switch
563    if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) {
564      LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(),
565        parentHRI);
566      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString()
567        + " failed as region split is disabled for the table"));
568      return false;
569    }
570
571    // set node state as SPLITTING
572    node.setState(State.SPLITTING);
573
574    // Since we have the lock and the master is coordinating the operation
575    // we are always able to split the region
576    return true;
577  }
578
579  /**
580   * Rollback prepare split region
581   * @param env MasterProcedureEnv
582   */
583  private void rollbackPrepareSplit(final MasterProcedureEnv env) {
584    RegionStateNode parentRegionStateNode =
585      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
586    if (parentRegionStateNode.getState() == State.SPLITTING) {
587      parentRegionStateNode.setState(State.OPEN);
588    }
589  }
590
591  /**
592   * Action before splitting region in a table.
593   * @param env MasterProcedureEnv
594   */
595  private void preSplitRegion(final MasterProcedureEnv env)
596    throws IOException, InterruptedException {
597    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
598    if (cpHost != null) {
599      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
600    }
601
602    // TODO: Clean up split and merge. Currently all over the place.
603    // Notify QuotaManager and RegionNormalizer
604    try {
605      MasterQuotaManager masterQuotaManager = env.getMasterServices().getMasterQuotaManager();
606      if (masterQuotaManager != null) {
607        masterQuotaManager.onRegionSplit(this.getParentRegion());
608      }
609    } catch (QuotaExceededException e) {
610      // TODO: why is this here? split requests can be submitted by actors other than the normalizer
611      env.getMasterServices().getRegionNormalizerManager()
612        .planSkipped(NormalizationPlan.PlanType.SPLIT);
613      throw e;
614    }
615  }
616
617  /**
618   * Action after rollback a split table region action.
619   * @param env MasterProcedureEnv
620   */
621  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
622    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
623    if (cpHost != null) {
624      cpHost.postRollBackSplitRegionAction(getUser());
625    }
626  }
627
628  /**
629   * Rollback close parent region
630   */
631  private void openParentRegion(MasterProcedureEnv env) throws IOException {
632    AssignmentManagerUtil.reopenRegionsForRollback(env,
633      Collections.singletonList((getParentRegion())), getRegionReplication(env),
634      getParentRegionServerName(env));
635  }
636
637  /**
638   * Create daughter regions
639   */
640  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
641    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
642    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
643    final FileSystem fs = mfs.getFileSystem();
644    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
645      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
646    regionFs.createSplitsDir(daughterOneRI, daughterTwoRI);
647
648    Pair<List<Path>, List<Path>> expectedReferences = splitStoreFiles(env, regionFs);
649
650    assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(),
651      regionFs.getSplitsDir(daughterOneRI));
652    regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env);
653    assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(),
654      new Path(tabledir, daughterOneRI.getEncodedName()));
655
656    assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(),
657      regionFs.getSplitsDir(daughterTwoRI));
658    regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env);
659    assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(),
660      new Path(tabledir, daughterTwoRI.getEncodedName()));
661  }
662
663  private void deleteDaughterRegions(final MasterProcedureEnv env) throws IOException {
664    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
665    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
666    HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
667      tabledir, daughterOneRI);
668    HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
669      tabledir, daughterTwoRI);
670  }
671
672  /**
673   * Create Split directory
674   * @param env MasterProcedureEnv
675   */
676  private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv env,
677    final HRegionFileSystem regionFs) throws IOException {
678    final Configuration conf = env.getMasterConfiguration();
679    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
680    // The following code sets up a thread pool executor with as many slots as
681    // there's files to split. It then fires up everything, waits for
682    // completion and finally checks for any exception
683    //
684    // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the
685    // table dir. In case of failure, the proc would go through this again, already existing
686    // region dirs and split files would just be ignored, new split files should get created.
687    int nbFiles = 0;
688    final Map<String, Collection<StoreFileInfo>> files =
689      new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
690    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
691      String family = cfd.getNameAsString();
692      StoreFileTracker tracker =
693        StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs);
694      Collection<StoreFileInfo> sfis = tracker.load();
695      if (sfis == null) {
696        continue;
697      }
698      Collection<StoreFileInfo> filteredSfis = null;
699      for (StoreFileInfo sfi : sfis) {
700        // Filter. There is a lag cleaning up compacted reference files. They get cleared
701        // after a delay in case outstanding Scanners still have references. Because of this,
702        // the listing of the Store content may have straggler reference files. Skip these.
703        // It should be safe to skip references at this point because we checked above with
704        // the region if it thinks it is splittable and if we are here, it thinks it is
705        // splitable.
706        if (sfi.isReference()) {
707          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
708          continue;
709        }
710        if (filteredSfis == null) {
711          filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
712          files.put(family, filteredSfis);
713        }
714        filteredSfis.add(sfi);
715        nbFiles++;
716      }
717    }
718    if (nbFiles == 0) {
719      // no file needs to be splitted.
720      return new Pair<>(Collections.emptyList(), Collections.emptyList());
721    }
722    // Max #threads is the smaller of the number of storefiles or the default max determined above.
723    int maxThreads = Math.min(
724      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
725        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
726      nbFiles);
727    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region="
728      + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
729    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
730      new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true)
731        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
732    final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);
733
734    // Split each store file.
735    for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) {
736      byte[] familyName = Bytes.toBytes(e.getKey());
737      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
738      Collection<StoreFileInfo> storeFileInfos = e.getValue();
739      final Collection<StoreFileInfo> storeFiles = storeFileInfos;
740      if (storeFiles != null && storeFiles.size() > 0) {
741        final Configuration storeConfiguration =
742          StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd);
743        for (StoreFileInfo storeFileInfo : storeFiles) {
744          // As this procedure is running on master, use CacheConfig.DISABLED means
745          // don't cache any block.
746          // We also need to pass through a suitable CompoundConfiguration as if this
747          // is running in a regionserver's Store context, or we might not be able
748          // to read the hfiles.
749          storeFileInfo.setConf(storeConfiguration);
750          StoreFileSplitter sfs = new StoreFileSplitter(regionFs, htd, hcd,
751            new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED));
752          futures.add(threadPool.submit(sfs));
753        }
754      }
755    }
756    // Shutdown the pool
757    threadPool.shutdown();
758
759    // Wait for all the tasks to finish.
760    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
761    // hbase.regionserver.fileSplitTimeout. If set, use its value.
762    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
763      conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
764    try {
765      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
766      if (stillRunning) {
767        threadPool.shutdownNow();
768        // wait for the thread to shutdown completely.
769        while (!threadPool.isTerminated()) {
770          Thread.sleep(50);
771        }
772        throw new IOException(
773          "Took too long to split the" + " files and create the references, aborting split");
774      }
775    } catch (InterruptedException e) {
776      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
777    }
778
779    List<Path> daughterA = new ArrayList<>();
780    List<Path> daughterB = new ArrayList<>();
781    // Look for any exception
782    for (Future<Pair<Path, Path>> future : futures) {
783      try {
784        Pair<Path, Path> p = future.get();
785        if (p.getFirst() != null) {
786          daughterA.add(p.getFirst());
787        }
788        if (p.getSecond() != null) {
789          daughterB.add(p.getSecond());
790        }
791      } catch (InterruptedException e) {
792        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
793      } catch (ExecutionException e) {
794        throw new IOException(e);
795      }
796    }
797
798    if (LOG.isDebugEnabled()) {
799      LOG.debug("pid=" + getProcId() + " split storefiles for region "
800        + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA
801        + " storefiles, Daughter B: " + daughterB + " storefiles.");
802    }
803    return new Pair<>(daughterA, daughterB);
804  }
805
806  private void assertSplitResultFilesCount(final FileSystem fs,
807    final int expectedSplitResultFileCount, Path dir) throws IOException {
808    if (expectedSplitResultFileCount != 0) {
809      int resultFileCount = FSUtils.getRegionReferenceAndLinkFileCount(fs, dir);
810      if (expectedSplitResultFileCount != resultFileCount) {
811        throw new IOException("Failing split. Didn't have expected reference and HFileLink files"
812          + ", expected=" + expectedSplitResultFileCount + ", actual=" + resultFileCount);
813      }
814    }
815  }
816
817  private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, TableDescriptor htd,
818    ColumnFamilyDescriptor hcd, HStoreFile sf) throws IOException {
819    if (LOG.isDebugEnabled()) {
820      LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath()
821        + " for region: " + getParentRegion().getShortNameToLog());
822    }
823
824    final byte[] splitRow = getSplitRow();
825    final String familyName = hcd.getNameAsString();
826    StoreFileTracker daughterOneSft =
827      StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd,
828        HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(),
829          regionFs.getTableDir(), daughterOneRI));
830    StoreFileTracker daughterTwoSft =
831      StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd,
832        HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(),
833          regionFs.getTableDir(), daughterTwoRI));
834    final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow,
835      false, splitPolicy, daughterOneSft);
836    final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow,
837      true, splitPolicy, daughterTwoSft);
838    if (LOG.isDebugEnabled()) {
839      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath()
840        + " for region: " + getParentRegion().getShortNameToLog());
841    }
842    return new Pair<Path, Path>(path_first, path_second);
843  }
844
845  /**
846   * Utility class used to do the file splitting / reference writing in parallel instead of
847   * sequentially.
848   */
849  private class StoreFileSplitter implements Callable<Pair<Path, Path>> {
850    private final HRegionFileSystem regionFs;
851    private final ColumnFamilyDescriptor hcd;
852    private final HStoreFile sf;
853    private final TableDescriptor htd;
854
855    /**
856     * Constructor that takes what it needs to split
857     * @param regionFs the file system
858     * @param hcd      Family that contains the store file
859     * @param sf       which file
860     */
861    public StoreFileSplitter(HRegionFileSystem regionFs, TableDescriptor htd,
862      ColumnFamilyDescriptor hcd, HStoreFile sf) {
863      this.regionFs = regionFs;
864      this.sf = sf;
865      this.hcd = hcd;
866      this.htd = htd;
867    }
868
869    @Override
870    public Pair<Path, Path> call() throws IOException {
871      return splitStoreFile(regionFs, htd, hcd, sf);
872    }
873  }
874
875  /**
876   * Post split region actions before the Point-of-No-Return step
877   * @param env MasterProcedureEnv
878   **/
879  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
880    throws IOException, InterruptedException {
881    final List<Mutation> metaEntries = new ArrayList<Mutation>();
882    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
883    if (cpHost != null) {
884      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
885      try {
886        for (Mutation p : metaEntries) {
887          RegionInfo.parseRegionName(p.getRow());
888        }
889      } catch (IOException e) {
890        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
891          + "region name." + "Mutations from coprocessor should only for hbase:meta table.");
892        throw e;
893      }
894    }
895  }
896
897  /**
898   * Add daughter regions to META
899   * @param env MasterProcedureEnv
900   */
901  private void updateMeta(final MasterProcedureEnv env) throws IOException {
902    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
903      daughterOneRI, daughterTwoRI);
904  }
905
906  /**
907   * Pre split region actions after the Point-of-No-Return step
908   * @param env MasterProcedureEnv
909   **/
910  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
911    throws IOException, InterruptedException {
912    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
913    if (cpHost != null) {
914      cpHost.preSplitAfterMETAAction(getUser());
915    }
916  }
917
918  /**
919   * Post split region actions
920   * @param env MasterProcedureEnv
921   **/
922  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
923    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
924    if (cpHost != null) {
925      cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser());
926    }
927  }
928
929  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
930    return env.getMasterServices().getAssignmentManager().getRegionStates()
931      .getRegionServerOfRegion(getParentRegion());
932  }
933
934  private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env)
935    throws IOException {
936    return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env,
937      Stream.of(getParentRegion()), getRegionReplication(env));
938  }
939
940  private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
941    throws IOException {
942    List<RegionInfo> hris = new ArrayList<RegionInfo>(2);
943    hris.add(daughterOneRI);
944    hris.add(daughterTwoRI);
945    return AssignmentManagerUtil.createAssignProceduresForSplitDaughters(env, hris,
946      getRegionReplication(env), getParentRegionServerName(env));
947  }
948
949  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
950    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
951    return htd.getRegionReplication();
952  }
953
954  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
955    MasterFileSystem fs = env.getMasterFileSystem();
956    long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(),
957      getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
958    if (maxSequenceId > 0) {
959      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
960        getWALRegionDir(env, daughterOneRI), maxSequenceId);
961      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
962        getWALRegionDir(env, daughterTwoRI), maxSequenceId);
963    }
964  }
965
966  @Override
967  protected boolean abort(MasterProcedureEnv env) {
968    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
969    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
970    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
971    return isRollbackSupported(getCurrentState()) ? super.abort(env) : false;
972  }
973}