001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.io.InterruptedIOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.concurrent.Callable;
030import java.util.concurrent.ExecutionException;
031import java.util.concurrent.ExecutorService;
032import java.util.concurrent.Executors;
033import java.util.concurrent.Future;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Stream;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.fs.FileSystem;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.hbase.DoNotRetryIOException;
040import org.apache.hadoop.hbase.HConstants;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.UnknownRegionException;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
045import org.apache.hadoop.hbase.client.MasterSwitchType;
046import org.apache.hadoop.hbase.client.Mutation;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.TableDescriptor;
050import org.apache.hadoop.hbase.io.hfile.CacheConfig;
051import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
052import org.apache.hadoop.hbase.master.MasterFileSystem;
053import org.apache.hadoop.hbase.master.RegionState.State;
054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
060import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
061import org.apache.hadoop.hbase.quotas.QuotaExceededException;
062import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
063import org.apache.hadoop.hbase.regionserver.HStore;
064import org.apache.hadoop.hbase.regionserver.HStoreFile;
065import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
066import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction;
067import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
068import org.apache.hadoop.hbase.regionserver.StoreUtils;
069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
070import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
071import org.apache.hadoop.hbase.util.Bytes;
072import org.apache.hadoop.hbase.util.CommonFSUtils;
073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
074import org.apache.hadoop.hbase.util.Pair;
075import org.apache.hadoop.hbase.util.Threads;
076import org.apache.hadoop.hbase.wal.WALSplitUtil;
077import org.apache.hadoop.util.ReflectionUtils;
078import org.apache.yetus.audience.InterfaceAudience;
079import org.slf4j.Logger;
080import org.slf4j.LoggerFactory;
081
082import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
083
084import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
085import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
086import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
088
089/**
090 * The procedure to split a region in a table. Takes lock on the parent region. It holds the lock
091 * for the life of the procedure.
092 * <p>
093 * Throws exception on construction if determines context hostile to spllt (cluster going down or
094 * master is shutting down or table is disabled).
095 * </p>
096 */
097@InterfaceAudience.Private
098public class SplitTableRegionProcedure
099  extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
100  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
101  private RegionInfo daughterOneRI;
102  private RegionInfo daughterTwoRI;
103  private byte[] bestSplitRow;
104  private RegionSplitPolicy splitPolicy;
105  // exposed for unit testing
106  boolean checkTableModifyInProgress = true;
107
108  public SplitTableRegionProcedure() {
109    // Required by the Procedure framework to create the procedure on replay
110  }
111
112  public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo regionToSplit,
113    final byte[] splitRow) throws IOException {
114    super(env, regionToSplit);
115    preflightChecks(env, true);
116    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
117    // we fail-fast on construction. There it skips the split with just a warning.
118    checkOnline(env, regionToSplit);
119    this.bestSplitRow = splitRow;
120    TableDescriptor tableDescriptor =
121      env.getMasterServices().getTableDescriptors().get(getTableName());
122    Configuration conf = env.getMasterConfiguration();
123    if (hasBestSplitRow()) {
124      // Apply the split restriction for the table to the user-specified split point
125      RegionSplitRestriction splitRestriction =
126        RegionSplitRestriction.create(tableDescriptor, conf);
127      byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow);
128      if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) {
129        LOG.warn(
130          "The specified split point {} violates the split restriction of the table. "
131            + "Using {} as a split point.",
132          Bytes.toStringBinary(bestSplitRow), Bytes.toStringBinary(restrictedSplitRow));
133        bestSplitRow = restrictedSplitRow;
134      }
135    }
136    checkSplittable(env, regionToSplit);
137    final TableName table = regionToSplit.getTable();
138    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
139    this.daughterOneRI =
140      RegionInfoBuilder.newBuilder(table).setStartKey(regionToSplit.getStartKey())
141        .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build();
142    this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow)
143      .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build();
144
145    if (tableDescriptor.getRegionSplitPolicyClassName() != null) {
146      // Since we don't have region reference here, creating the split policy instance without it.
147      // This can be used to invoke methods which don't require Region reference. This instantiation
148      // of a class on Master-side though it only makes sense on the RegionServer-side is
149      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
150      Class<? extends RegionSplitPolicy> clazz =
151        RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf);
152      this.splitPolicy = ReflectionUtils.newInstance(clazz, conf);
153    }
154  }
155
156  @Override
157  protected LockState acquireLock(final MasterProcedureEnv env) {
158    if (
159      env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(),
160        daughterOneRI, daughterTwoRI)
161    ) {
162      try {
163        LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks());
164      } catch (IOException e) {
165        // Ignore, just for logging
166      }
167      return LockState.LOCK_EVENT_WAIT;
168    }
169    return LockState.LOCK_ACQUIRED;
170  }
171
172  @Override
173  protected void releaseLock(final MasterProcedureEnv env) {
174    env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI,
175      daughterTwoRI);
176  }
177
178  public RegionInfo getDaughterOneRI() {
179    return daughterOneRI;
180  }
181
182  public RegionInfo getDaughterTwoRI() {
183    return daughterTwoRI;
184  }
185
186  private boolean hasBestSplitRow() {
187    return bestSplitRow != null && bestSplitRow.length > 0;
188  }
189
190  /**
191   * Check whether the region is splittable
192   * @param env           MasterProcedureEnv
193   * @param regionToSplit parent Region to be split
194   */
195  private void checkSplittable(final MasterProcedureEnv env, final RegionInfo regionToSplit)
196    throws IOException {
197    // Ask the remote RS if this region is splittable.
198    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at
199    // this time.
200    if (regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
201      throw new IllegalArgumentException("Can't invoke split on non-default regions directly");
202    }
203    RegionStateNode node =
204      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
205    IOException splittableCheckIOE = null;
206    boolean splittable = false;
207    if (node != null) {
208      try {
209        GetRegionInfoResponse response;
210        if (!hasBestSplitRow()) {
211          LOG.info(
212            "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}",
213            node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation());
214          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
215            node.getRegionInfo(), true);
216          bestSplitRow =
217            response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
218        } else {
219          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
220            node.getRegionInfo(), false);
221        }
222        splittable = response.hasSplittable() && response.getSplittable();
223        if (LOG.isDebugEnabled()) {
224          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
225        }
226      } catch (IOException e) {
227        splittableCheckIOE = e;
228      }
229    }
230
231    if (!splittable) {
232      IOException e =
233        new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
234      if (splittableCheckIOE != null) {
235        e.initCause(splittableCheckIOE);
236      }
237      throw e;
238    }
239
240    if (!hasBestSplitRow()) {
241      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, "
242        + "maybe table is too small for auto split. For force split, try specifying split row");
243    }
244
245    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
246      throw new DoNotRetryIOException(
247        "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow));
248    }
249
250    if (!regionToSplit.containsRow(bestSplitRow)) {
251      throw new DoNotRetryIOException("Split row is not inside region key range splitKey:"
252        + Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit);
253    }
254  }
255
256  /**
257   * Calculate daughter regionid to use.
258   * @param hri Parent {@link RegionInfo}
259   * @return Daughter region id (timestamp) to use.
260   */
261  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
262    long rid = EnvironmentEdgeManager.currentTime();
263    // Regionid is timestamp. Can't be less than that of parent else will insert
264    // at wrong location in hbase:meta (See HBASE-710).
265    if (rid < hri.getRegionId()) {
266      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId()
267        + " but current time here is " + rid);
268      rid = hri.getRegionId() + 1;
269    }
270    return rid;
271  }
272
273  private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException {
274    AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()),
275      getRegionReplication(env));
276  }
277
278  private void checkClosedRegions(MasterProcedureEnv env) throws IOException {
279    // theoretically this should not happen any more after we use TRSP, but anyway let's add a check
280    // here
281    AssignmentManagerUtil.checkClosedRegion(env, getParentRegion());
282  }
283
284  @Override
285  protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state)
286    throws InterruptedException {
287    LOG.trace("{} execute state={}", this, state);
288
289    try {
290      switch (state) {
291        case SPLIT_TABLE_REGION_PREPARE:
292          if (prepareSplitRegion(env)) {
293            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
294            break;
295          } else {
296            return Flow.NO_MORE_STATE;
297          }
298        case SPLIT_TABLE_REGION_PRE_OPERATION:
299          preSplitRegion(env);
300          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
301          break;
302        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
303          addChildProcedure(createUnassignProcedures(env));
304          // createUnassignProcedures() can throw out IOException. If this happens,
305          // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions
306          // is closed as all created UnassignProcedures are rolled back. If it rolls back with
307          // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(),
308          // otherwise, it will result in OpenRegionProcedure for an already open region.
309          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
310          break;
311        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
312          checkClosedRegions(env);
313          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
314          break;
315        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
316          removeNonDefaultReplicas(env);
317          createDaughterRegions(env);
318          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
319          break;
320        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
321          writeMaxSequenceIdFile(env);
322          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
323          break;
324        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
325          preSplitRegionBeforeMETA(env);
326          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
327          break;
328        case SPLIT_TABLE_REGION_UPDATE_META:
329          updateMeta(env);
330          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
331          break;
332        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
333          preSplitRegionAfterMETA(env);
334          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
335          break;
336        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
337          addChildProcedure(createAssignProcedures(env));
338          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
339          break;
340        case SPLIT_TABLE_REGION_POST_OPERATION:
341          postSplitRegion(env);
342          return Flow.NO_MORE_STATE;
343        default:
344          throw new UnsupportedOperationException(this + " unhandled state=" + state);
345      }
346    } catch (IOException e) {
347      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
348      if (!isRollbackSupported(state)) {
349        // We reach a state that cannot be rolled back. We just need to keep retrying.
350        LOG.warn(msg, e);
351      } else {
352        LOG.error(msg, e);
353        setFailure("master-split-regions", e);
354      }
355    }
356    // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split
357    return Flow.HAS_MORE_STATE;
358  }
359
360  /**
361   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously submitted
362   * for parent region to be split (rollback doesn't wait on the completion of the AssignProcedure)
363   * . This can be improved by changing rollback() to support sub-procedures. See HBASE-19851 for
364   * details.
365   */
366  @Override
367  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
368    throws IOException, InterruptedException {
369    LOG.trace("{} rollback state={}", this, state);
370
371    try {
372      switch (state) {
373        case SPLIT_TABLE_REGION_POST_OPERATION:
374        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
375        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
376        case SPLIT_TABLE_REGION_UPDATE_META:
377          // PONR
378          throw new UnsupportedOperationException(this + " unhandled state=" + state);
379        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
380          break;
381        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
382        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
383          deleteDaughterRegions(env);
384          break;
385        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
386          openParentRegion(env);
387          break;
388        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
389          // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call
390          // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an
391          // already open region.
392          break;
393        case SPLIT_TABLE_REGION_PRE_OPERATION:
394          postRollBackSplitRegion(env);
395          break;
396        case SPLIT_TABLE_REGION_PREPARE:
397          rollbackPrepareSplit(env);
398          break;
399        default:
400          throw new UnsupportedOperationException(this + " unhandled state=" + state);
401      }
402    } catch (IOException e) {
403      // This will be retried. Unless there is a bug in the code,
404      // this should be just a "temporary error" (e.g. network down)
405      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state
406        + " for splitting the region " + getParentRegion().getEncodedName() + " in table "
407        + getTableName(), e);
408      throw e;
409    }
410  }
411
412  /*
413   * Check whether we are in the state that can be rollback
414   */
415  @Override
416  protected boolean isRollbackSupported(final SplitTableRegionState state) {
417    switch (state) {
418      case SPLIT_TABLE_REGION_POST_OPERATION:
419      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
420      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
421      case SPLIT_TABLE_REGION_UPDATE_META:
422        // It is not safe to rollback if we reach to these states.
423        return false;
424      default:
425        break;
426    }
427    return true;
428  }
429
430  @Override
431  protected SplitTableRegionState getState(final int stateId) {
432    return SplitTableRegionState.forNumber(stateId);
433  }
434
435  @Override
436  protected int getStateId(final SplitTableRegionState state) {
437    return state.getNumber();
438  }
439
440  @Override
441  protected SplitTableRegionState getInitialState() {
442    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
443  }
444
445  @Override
446  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
447    super.serializeStateData(serializer);
448
449    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
450      MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
451        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
452        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
453        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI))
454        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI));
455    serializer.serialize(splitTableRegionMsg.build());
456  }
457
458  @Override
459  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
460    super.deserializeStateData(serializer);
461
462    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
463      serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
464    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
465    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
466    assert (splitTableRegionsMsg.getChildRegionInfoCount() == 2);
467    daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
468    daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
469  }
470
471  @Override
472  public void toStringClassDetails(StringBuilder sb) {
473    sb.append(getClass().getSimpleName());
474    sb.append(" table=");
475    sb.append(getTableName());
476    sb.append(", parent=");
477    sb.append(getParentRegion().getShortNameToLog());
478    sb.append(", daughterA=");
479    sb.append(daughterOneRI.getShortNameToLog());
480    sb.append(", daughterB=");
481    sb.append(daughterTwoRI.getShortNameToLog());
482  }
483
484  private RegionInfo getParentRegion() {
485    return getRegion();
486  }
487
488  @Override
489  public TableOperationType getTableOperationType() {
490    return TableOperationType.REGION_SPLIT;
491  }
492
493  @Override
494  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
495    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
496  }
497
498  private byte[] getSplitRow() {
499    return daughterTwoRI.getStartKey();
500  }
501
502  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
503
504  /**
505   * Prepare to Split region.
506   * @param env MasterProcedureEnv
507   */
508  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
509    // Fail if we are taking snapshot for the given table
510    if (
511      env.getMasterServices().getSnapshotManager()
512        .isTableTakingAnySnapshot(getParentRegion().getTable())
513    ) {
514      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog()
515        + ", because we are taking snapshot for the table " + getParentRegion().getTable()));
516      return false;
517    }
518
519    /*
520     * Sometimes a ModifyTableProcedure has edited a table descriptor to change the number of region
521     * replicas for a table, but it has not yet opened/closed the new replicas. The
522     * ModifyTableProcedure assumes that nobody else will do the opening/closing of the new
523     * replicas, but a concurrent SplitTableRegionProcedure would violate that assumption.
524     */
525    if (checkTableModifyInProgress && isTableModificationInProgress(env)) {
526      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog()
527        + ", because there is an active procedure that is modifying the table "
528        + getParentRegion().getTable()));
529      return false;
530    }
531
532    // Check whether the region is splittable
533    RegionStateNode node =
534      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
535
536    if (node == null) {
537      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
538    }
539
540    RegionInfo parentHRI = node.getRegionInfo();
541    if (parentHRI == null) {
542      LOG.info("Unsplittable; parent region is null; node={}", node);
543      return false;
544    }
545    // Lookup the parent HRI state from the AM, which has the latest updated info.
546    // Protect against the case where concurrent SPLIT requests came in and succeeded
547    // just before us.
548    if (node.isInState(State.SPLIT)) {
549      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
550      return false;
551    }
552    if (parentHRI.isSplit() || parentHRI.isOffline()) {
553      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
554      return false;
555    }
556
557    // expected parent to be online or closed
558    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
559      // We may have SPLIT already?
560      setFailure(
561        new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state="
562          + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES)));
563      return false;
564    }
565
566    // Mostly the below two checks are not used because we already check the switches before
567    // submitting the split procedure. Just for safety, we are checking the switch again here.
568    // Also, in case the switch was set to false after submission, this procedure can be rollbacked,
569    // thanks to this double check!
570    // case 1: check for cluster level switch
571    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
572      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
573      setFailure(new IOException(
574        "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off"));
575      return false;
576    }
577    // case 2: check for table level switch
578    if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) {
579      LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(),
580        parentHRI);
581      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString()
582        + " failed as region split is disabled for the table"));
583      return false;
584    }
585
586    // set node state as SPLITTING
587    node.setState(State.SPLITTING);
588
589    // Since we have the lock and the master is coordinating the operation
590    // we are always able to split the region
591    return true;
592  }
593
594  /**
595   * Rollback prepare split region
596   * @param env MasterProcedureEnv
597   */
598  private void rollbackPrepareSplit(final MasterProcedureEnv env) {
599    RegionStateNode parentRegionStateNode =
600      env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
601    if (parentRegionStateNode.getState() == State.SPLITTING) {
602      parentRegionStateNode.setState(State.OPEN);
603    }
604  }
605
606  /**
607   * Action before splitting region in a table.
608   * @param env MasterProcedureEnv
609   */
610  private void preSplitRegion(final MasterProcedureEnv env)
611    throws IOException, InterruptedException {
612    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
613    if (cpHost != null) {
614      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
615    }
616
617    // TODO: Clean up split and merge. Currently all over the place.
618    // Notify QuotaManager and RegionNormalizer
619    try {
620      MasterQuotaManager masterQuotaManager = env.getMasterServices().getMasterQuotaManager();
621      if (masterQuotaManager != null) {
622        masterQuotaManager.onRegionSplit(this.getParentRegion());
623      }
624    } catch (QuotaExceededException e) {
625      // TODO: why is this here? split requests can be submitted by actors other than the normalizer
626      env.getMasterServices().getRegionNormalizerManager()
627        .planSkipped(NormalizationPlan.PlanType.SPLIT);
628      throw e;
629    }
630  }
631
632  /**
633   * Action after rollback a split table region action.
634   * @param env MasterProcedureEnv
635   */
636  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
637    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
638    if (cpHost != null) {
639      cpHost.postRollBackSplitRegionAction(getUser());
640    }
641  }
642
643  /**
644   * Rollback close parent region
645   */
646  private void openParentRegion(MasterProcedureEnv env) throws IOException {
647    AssignmentManagerUtil.reopenRegionsForRollback(env,
648      Collections.singletonList((getParentRegion())), getRegionReplication(env),
649      getParentRegionServerName(env));
650  }
651
652  /**
653   * Create daughter regions
654   */
655  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
656    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
657    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
658    final FileSystem fs = mfs.getFileSystem();
659    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
660      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
661    regionFs.createSplitsDir(daughterOneRI, daughterTwoRI);
662    Pair<List<StoreFileInfo>, List<StoreFileInfo>> expectedReferences =
663      splitStoreFiles(env, regionFs);
664    final ExecutorService threadPool = Executors.newFixedThreadPool(2,
665      new ThreadFactoryBuilder().setNameFormat("RegionCommitter-pool-%d").setDaemon(true)
666        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
667    Future<Path> futureOne = threadPool.submit(new Callable<Path>() {
668      @Override
669      public Path call() throws IOException {
670        return regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env);
671      }
672    });
673    Future<Path> futureTwo = threadPool.submit(new Callable<Path>() {
674      @Override
675      public Path call() throws IOException {
676        return regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env);
677      }
678    });
679    handleThreadPoolShutdown(threadPool, env.getMasterConfiguration());
680
681    try {
682      futureOne.get();
683      futureTwo.get();
684    } catch (InterruptedException e) {
685      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
686    } catch (ExecutionException e) {
687      throw new IOException("Daughter region commit failed", e);
688    }
689  }
690
691  private void handleThreadPoolShutdown(ExecutorService threadPool, Configuration conf)
692    throws IOException {
693    threadPool.shutdown();
694    // Wait for all the tasks to finish.
695    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
696    // fileSplitTimeout. If set, use its value.
697    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
698      conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
699    try {
700      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
701      if (stillRunning) {
702        threadPool.shutdownNow();
703        // wait for the thread to shutdown completely.
704        while (!threadPool.isTerminated()) {
705          Thread.sleep(50);
706        }
707        throw new IOException(
708          "Took too long to split the files and create the references, aborting split");
709      }
710    } catch (InterruptedException e) {
711      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
712    }
713  }
714
715  private void deleteDaughterRegions(final MasterProcedureEnv env) throws IOException {
716    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
717    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
718    HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
719      tabledir, daughterOneRI);
720    HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
721      tabledir, daughterTwoRI);
722  }
723
724  /**
725   * Create Split directory
726   * @param env MasterProcedureEnv
727   */
728  private Pair<List<StoreFileInfo>, List<StoreFileInfo>> splitStoreFiles(
729    final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException {
730    final Configuration conf = env.getMasterConfiguration();
731    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
732    // The following code sets up a thread pool executor with as many slots as
733    // there's files to split. It then fires up everything, waits for
734    // completion and finally checks for any exception
735    //
736    // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the
737    // table dir. In case of failure, the proc would go through this again, already existing
738    // region dirs and split files would just be ignored, new split files should get created.
739    int nbFiles = 0;
740    final Map<String, Collection<StoreFileInfo>> files =
741      new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
742    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
743      String family = cfd.getNameAsString();
744      StoreFileTracker tracker =
745        StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs);
746      Collection<StoreFileInfo> sfis = tracker.load();
747      if (sfis == null) {
748        continue;
749      }
750      Collection<StoreFileInfo> filteredSfis = null;
751      for (StoreFileInfo sfi : sfis) {
752        // Filter. There is a lag cleaning up compacted reference files. They get cleared
753        // after a delay in case outstanding Scanners still have references. Because of this,
754        // the listing of the Store content may have straggler reference files. Skip these.
755        // It should be safe to skip references at this point because we checked above with
756        // the region if it thinks it is splittable and if we are here, it thinks it is
757        // splitable.
758        if (sfi.isReference()) {
759          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
760          continue;
761        }
762        if (filteredSfis == null) {
763          filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
764          files.put(family, filteredSfis);
765        }
766        filteredSfis.add(sfi);
767        nbFiles++;
768      }
769    }
770    if (nbFiles == 0) {
771      // no file needs to be splitted.
772      return new Pair<>(Collections.emptyList(), Collections.emptyList());
773    }
774    // Max #threads is the smaller of the number of storefiles or the default max determined above.
775    int maxThreads = Math.min(
776      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
777        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
778      nbFiles);
779    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region="
780      + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
781    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
782      new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true)
783        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
784    final List<Future<Pair<StoreFileInfo, StoreFileInfo>>> futures =
785      new ArrayList<Future<Pair<StoreFileInfo, StoreFileInfo>>>(nbFiles);
786
787    // Split each store file.
788    for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) {
789      byte[] familyName = Bytes.toBytes(e.getKey());
790      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
791      Collection<StoreFileInfo> storeFileInfos = e.getValue();
792      final Collection<StoreFileInfo> storeFiles = storeFileInfos;
793      if (storeFiles != null && storeFiles.size() > 0) {
794        final Configuration storeConfiguration =
795          StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd);
796        for (StoreFileInfo storeFileInfo : storeFiles) {
797          // As this procedure is running on master, use CacheConfig.DISABLED means
798          // don't cache any block.
799          // We also need to pass through a suitable CompoundConfiguration as if this
800          // is running in a regionserver's Store context, or we might not be able
801          // to read the hfiles.
802          storeFileInfo.setConf(storeConfiguration);
803          StoreFileSplitter sfs = new StoreFileSplitter(regionFs, htd, hcd,
804            new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED));
805          futures.add(threadPool.submit(sfs));
806        }
807      }
808    }
809    handleThreadPoolShutdown(threadPool, conf);
810    List<StoreFileInfo> daughterA = new ArrayList<>();
811    List<StoreFileInfo> daughterB = new ArrayList<>();
812    // Look for any exception
813    for (Future<Pair<StoreFileInfo, StoreFileInfo>> future : futures) {
814      try {
815        Pair<StoreFileInfo, StoreFileInfo> p = future.get();
816        if (p.getFirst() != null) {
817          daughterA.add(p.getFirst());
818        }
819        if (p.getSecond() != null) {
820          daughterB.add(p.getSecond());
821        }
822      } catch (InterruptedException e) {
823        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
824      } catch (ExecutionException e) {
825        throw new IOException(e);
826      }
827    }
828
829    if (LOG.isDebugEnabled()) {
830      LOG.debug("pid=" + getProcId() + " split storefiles for region "
831        + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA
832        + " storefiles, Daughter B: " + daughterB + " storefiles.");
833    }
834    return new Pair<>(daughterA, daughterB);
835  }
836
837  private Pair<StoreFileInfo, StoreFileInfo> splitStoreFile(HRegionFileSystem regionFs,
838    TableDescriptor htd, ColumnFamilyDescriptor hcd, HStoreFile sf) throws IOException {
839    if (LOG.isDebugEnabled()) {
840      LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath()
841        + " for region: " + getParentRegion().getShortNameToLog());
842    }
843
844    final byte[] splitRow = getSplitRow();
845    final String familyName = hcd.getNameAsString();
846    StoreFileTracker daughterOneSft =
847      StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd,
848        HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(),
849          regionFs.getTableDir(), daughterOneRI));
850    StoreFileTracker daughterTwoSft =
851      StoreFileTrackerFactory.create(regionFs.getFileSystem().getConf(), htd, hcd,
852        HRegionFileSystem.create(regionFs.getFileSystem().getConf(), regionFs.getFileSystem(),
853          regionFs.getTableDir(), daughterTwoRI));
854    final StoreFileInfo sfiFirst = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf,
855      splitRow, false, splitPolicy, daughterOneSft);
856    final StoreFileInfo sfiSecond = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf,
857      splitRow, true, splitPolicy, daughterTwoSft);
858    if (LOG.isDebugEnabled()) {
859      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath()
860        + " for region: " + getParentRegion().getShortNameToLog());
861    }
862    return new Pair<StoreFileInfo, StoreFileInfo>(sfiFirst, sfiSecond);
863  }
864
865  /**
866   * Utility class used to do the file splitting / reference writing in parallel instead of
867   * sequentially.
868   */
869  private class StoreFileSplitter implements Callable<Pair<StoreFileInfo, StoreFileInfo>> {
870    private final HRegionFileSystem regionFs;
871    private final ColumnFamilyDescriptor hcd;
872    private final HStoreFile sf;
873    private final TableDescriptor htd;
874
875    /**
876     * Constructor that takes what it needs to split
877     * @param regionFs the file system
878     * @param hcd      Family that contains the store file
879     * @param sf       which file
880     */
881    public StoreFileSplitter(HRegionFileSystem regionFs, TableDescriptor htd,
882      ColumnFamilyDescriptor hcd, HStoreFile sf) {
883      this.regionFs = regionFs;
884      this.sf = sf;
885      this.hcd = hcd;
886      this.htd = htd;
887    }
888
889    @Override
890    public Pair<StoreFileInfo, StoreFileInfo> call() throws IOException {
891      return splitStoreFile(regionFs, htd, hcd, sf);
892    }
893  }
894
895  /**
896   * Post split region actions before the Point-of-No-Return step
897   * @param env MasterProcedureEnv
898   **/
899  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
900    throws IOException, InterruptedException {
901    final List<Mutation> metaEntries = new ArrayList<Mutation>();
902    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
903    if (cpHost != null) {
904      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
905      try {
906        for (Mutation p : metaEntries) {
907          RegionInfo.parseRegionName(p.getRow());
908        }
909      } catch (IOException e) {
910        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
911          + "region name." + "Mutations from coprocessor should only for hbase:meta table.");
912        throw e;
913      }
914    }
915  }
916
917  /**
918   * Add daughter regions to META
919   * @param env MasterProcedureEnv
920   */
921  private void updateMeta(final MasterProcedureEnv env) throws IOException {
922    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
923      daughterOneRI, daughterTwoRI);
924  }
925
926  /**
927   * Pre split region actions after the Point-of-No-Return step
928   * @param env MasterProcedureEnv
929   **/
930  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
931    throws IOException, InterruptedException {
932    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
933    if (cpHost != null) {
934      cpHost.preSplitAfterMETAAction(getUser());
935    }
936  }
937
938  /**
939   * Post split region actions
940   * @param env MasterProcedureEnv
941   **/
942  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
943    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
944    if (cpHost != null) {
945      cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser());
946    }
947  }
948
949  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
950    return env.getMasterServices().getAssignmentManager().getRegionStates()
951      .getRegionServerOfRegion(getParentRegion());
952  }
953
954  private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env)
955    throws IOException {
956    return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env,
957      Stream.of(getParentRegion()), getRegionReplication(env));
958  }
959
960  private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
961    throws IOException {
962    List<RegionInfo> hris = new ArrayList<RegionInfo>(2);
963    hris.add(daughterOneRI);
964    hris.add(daughterTwoRI);
965    return AssignmentManagerUtil.createAssignProceduresForSplitDaughters(env, hris,
966      getRegionReplication(env), getParentRegionServerName(env));
967  }
968
969  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
970    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
971    return htd.getRegionReplication();
972  }
973
974  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
975    MasterFileSystem fs = env.getMasterFileSystem();
976    long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(),
977      getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
978    if (maxSequenceId > 0) {
979      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
980        getWALRegionDir(env, daughterOneRI), maxSequenceId);
981      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
982        getWALRegionDir(env, daughterTwoRI), maxSequenceId);
983    }
984  }
985
986  @Override
987  protected boolean abort(MasterProcedureEnv env) {
988    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
989    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
990    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
991    return isRollbackSupported(getCurrentState()) ? super.abort(env) : false;
992  }
993}