001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.io.InterruptedIOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.concurrent.Callable;
030import java.util.concurrent.ExecutionException;
031import java.util.concurrent.ExecutorService;
032import java.util.concurrent.Executors;
033import java.util.concurrent.Future;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Stream;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.fs.FileSystem;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.hbase.DoNotRetryIOException;
040import org.apache.hadoop.hbase.HConstants;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.UnknownRegionException;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
045import org.apache.hadoop.hbase.client.MasterSwitchType;
046import org.apache.hadoop.hbase.client.Mutation;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.TableDescriptor;
050import org.apache.hadoop.hbase.io.hfile.CacheConfig;
051import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
052import org.apache.hadoop.hbase.master.MasterFileSystem;
053import org.apache.hadoop.hbase.master.RegionState.State;
054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
060import org.apache.hadoop.hbase.quotas.QuotaExceededException;
061import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
062import org.apache.hadoop.hbase.regionserver.HStore;
063import org.apache.hadoop.hbase.regionserver.HStoreFile;
064import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
065import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
066import org.apache.hadoop.hbase.util.Bytes;
067import org.apache.hadoop.hbase.util.CommonFSUtils;
068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
069import org.apache.hadoop.hbase.util.FSUtils;
070import org.apache.hadoop.hbase.util.Pair;
071import org.apache.hadoop.hbase.util.Threads;
072import org.apache.hadoop.hbase.wal.WALSplitUtil;
073import org.apache.hadoop.util.ReflectionUtils;
074import org.apache.yetus.audience.InterfaceAudience;
075import org.slf4j.Logger;
076import org.slf4j.LoggerFactory;
077
078import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
079
080import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
081import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
082import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
083import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
084
085/**
086 * The procedure to split a region in a table.
087 * Takes lock on the parent region.
088 * It holds the lock for the life of the procedure.
089 * <p>Throws exception on construction if determines context hostile to spllt (cluster going
090 * down or master is shutting down or table is disabled).</p>
091 */
092@InterfaceAudience.Private
093public class SplitTableRegionProcedure
094    extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
095  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
096  private RegionInfo daughterOneRI;
097  private RegionInfo daughterTwoRI;
098  private byte[] bestSplitRow;
099  private RegionSplitPolicy splitPolicy;
100
101  public SplitTableRegionProcedure() {
102    // Required by the Procedure framework to create the procedure on replay
103  }
104
105  public SplitTableRegionProcedure(final MasterProcedureEnv env,
106      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
107    super(env, regionToSplit);
108    preflightChecks(env, true);
109    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
110    // we fail-fast on construction. There it skips the split with just a warning.
111    checkOnline(env, regionToSplit);
112    this.bestSplitRow = splitRow;
113    checkSplittable(env, regionToSplit);
114    final TableName table = regionToSplit.getTable();
115    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
116    this.daughterOneRI = RegionInfoBuilder.newBuilder(table)
117        .setStartKey(regionToSplit.getStartKey())
118        .setEndKey(bestSplitRow)
119        .setSplit(false)
120        .setRegionId(rid)
121        .build();
122    this.daughterTwoRI = RegionInfoBuilder.newBuilder(table)
123        .setStartKey(bestSplitRow)
124        .setEndKey(regionToSplit.getEndKey())
125        .setSplit(false)
126        .setRegionId(rid)
127        .build();
128    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
129    if(htd.getRegionSplitPolicyClassName() != null) {
130      // Since we don't have region reference here, creating the split policy instance without it.
131      // This can be used to invoke methods which don't require Region reference. This instantiation
132      // of a class on Master-side though it only makes sense on the RegionServer-side is
133      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
134      Class<? extends RegionSplitPolicy> clazz =
135          RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration());
136      this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration());
137    }
138  }
139
140  @Override
141  protected LockState acquireLock(final MasterProcedureEnv env) {
142    if (env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(),
143      daughterOneRI, daughterTwoRI)) {
144      try {
145        LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks());
146      } catch (IOException e) {
147        // Ignore, just for logging
148      }
149      return LockState.LOCK_EVENT_WAIT;
150    }
151    return LockState.LOCK_ACQUIRED;
152  }
153
154  @Override
155  protected void releaseLock(final MasterProcedureEnv env) {
156    env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI,
157      daughterTwoRI);
158  }
159
160  public RegionInfo getDaughterOneRI() {
161    return daughterOneRI;
162  }
163
164  public RegionInfo getDaughterTwoRI() {
165    return daughterTwoRI;
166  }
167
168  private boolean hasBestSplitRow() {
169    return bestSplitRow != null && bestSplitRow.length > 0;
170  }
171
172  /**
173   * Check whether the region is splittable
174   * @param env MasterProcedureEnv
175   * @param regionToSplit parent Region to be split
176   */
177  private void checkSplittable(final MasterProcedureEnv env,
178      final RegionInfo regionToSplit) throws IOException {
179    // Ask the remote RS if this region is splittable.
180    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at
181    // this time.
182    if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
183      throw new IllegalArgumentException("Can't invoke split on non-default regions directly");
184    }
185    RegionStateNode node =
186        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
187    IOException splittableCheckIOE = null;
188    boolean splittable = false;
189    if (node != null) {
190      try {
191        GetRegionInfoResponse response;
192        if (!hasBestSplitRow()) {
193          LOG.info(
194            "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}",
195            node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation());
196          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
197            node.getRegionInfo(), true);
198          bestSplitRow =
199            response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
200        } else {
201          response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(),
202            node.getRegionInfo(), false);
203        }
204        splittable = response.hasSplittable() && response.getSplittable();
205        if (LOG.isDebugEnabled()) {
206          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
207        }
208      } catch (IOException e) {
209        splittableCheckIOE = e;
210      }
211    }
212
213    if (!splittable) {
214      IOException e =
215        new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
216      if (splittableCheckIOE != null) {
217        e.initCause(splittableCheckIOE);
218      }
219      throw e;
220    }
221
222    if (bestSplitRow == null || bestSplitRow.length == 0) {
223      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " +
224        "maybe table is too small for auto split. For force split, try specifying split row");
225    }
226
227    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
228      throw new DoNotRetryIOException(
229        "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow));
230    }
231
232    if (!regionToSplit.containsRow(bestSplitRow)) {
233      throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" +
234        Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit);
235    }
236  }
237
238  /**
239   * Calculate daughter regionid to use.
240   * @param hri Parent {@link RegionInfo}
241   * @return Daughter region id (timestamp) to use.
242   */
243  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
244    long rid = EnvironmentEdgeManager.currentTime();
245    // Regionid is timestamp.  Can't be less than that of parent else will insert
246    // at wrong location in hbase:meta (See HBASE-710).
247    if (rid < hri.getRegionId()) {
248      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
249        " but current time here is " + rid);
250      rid = hri.getRegionId() + 1;
251    }
252    return rid;
253  }
254
255  private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException {
256    AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()),
257      getRegionReplication(env));
258  }
259
260  private void checkClosedRegions(MasterProcedureEnv env) throws IOException {
261    // theoretically this should not happen any more after we use TRSP, but anyway let's add a check
262    // here
263    AssignmentManagerUtil.checkClosedRegion(env, getParentRegion());
264  }
265
266  @Override
267  protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state)
268      throws InterruptedException {
269    LOG.trace("{} execute state={}", this, state);
270
271    try {
272      switch (state) {
273        case SPLIT_TABLE_REGION_PREPARE:
274          if (prepareSplitRegion(env)) {
275            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
276            break;
277          } else {
278            return Flow.NO_MORE_STATE;
279          }
280        case SPLIT_TABLE_REGION_PRE_OPERATION:
281          preSplitRegion(env);
282          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
283          break;
284        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
285          addChildProcedure(createUnassignProcedures(env));
286          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
287          break;
288        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
289          checkClosedRegions(env);
290          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
291          break;
292        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
293          removeNonDefaultReplicas(env);
294          createDaughterRegions(env);
295          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
296          break;
297        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
298          writeMaxSequenceIdFile(env);
299          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
300          break;
301        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
302          preSplitRegionBeforeMETA(env);
303          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
304          break;
305        case SPLIT_TABLE_REGION_UPDATE_META:
306          updateMeta(env);
307          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
308          break;
309        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
310          preSplitRegionAfterMETA(env);
311          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
312          break;
313        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
314          addChildProcedure(createAssignProcedures(env));
315          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
316          break;
317        case SPLIT_TABLE_REGION_POST_OPERATION:
318          postSplitRegion(env);
319          return Flow.NO_MORE_STATE;
320        default:
321          throw new UnsupportedOperationException(this + " unhandled state=" + state);
322      }
323    } catch (IOException e) {
324      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
325      if (!isRollbackSupported(state)) {
326        // We reach a state that cannot be rolled back. We just need to keep retrying.
327        LOG.warn(msg, e);
328      } else {
329        LOG.error(msg, e);
330        setFailure("master-split-regions", e);
331      }
332    }
333    // if split fails,  need to call ((HRegion)parent).clearSplit() when it is a force split
334    return Flow.HAS_MORE_STATE;
335  }
336
337  /**
338   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously
339   * submitted for parent region to be split (rollback doesn't wait on the completion of the
340   * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures.
341   * See HBASE-19851 for details.
342   */
343  @Override
344  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
345      throws IOException, InterruptedException {
346    LOG.trace("{} rollback state={}", this, state);
347
348    try {
349      switch (state) {
350        case SPLIT_TABLE_REGION_POST_OPERATION:
351        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
352        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
353        case SPLIT_TABLE_REGION_UPDATE_META:
354          // PONR
355          throw new UnsupportedOperationException(this + " unhandled state=" + state);
356        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
357          break;
358        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
359        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
360          // Doing nothing, as re-open parent region would clean up daughter region directories.
361          break;
362        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
363          // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION,
364          // we will bring parent region online
365          break;
366        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
367          openParentRegion(env);
368          break;
369        case SPLIT_TABLE_REGION_PRE_OPERATION:
370          postRollBackSplitRegion(env);
371          break;
372        case SPLIT_TABLE_REGION_PREPARE:
373          break; // nothing to do
374        default:
375          throw new UnsupportedOperationException(this + " unhandled state=" + state);
376      }
377    } catch (IOException e) {
378      // This will be retried. Unless there is a bug in the code,
379      // this should be just a "temporary error" (e.g. network down)
380      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state +
381          " for splitting the region "
382        + getParentRegion().getEncodedName() + " in table " + getTableName(), e);
383      throw e;
384    }
385  }
386
387  /*
388   * Check whether we are in the state that can be rollback
389   */
390  @Override
391  protected boolean isRollbackSupported(final SplitTableRegionState state) {
392    switch (state) {
393      case SPLIT_TABLE_REGION_POST_OPERATION:
394      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
395      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
396      case SPLIT_TABLE_REGION_UPDATE_META:
397        // It is not safe to rollback if we reach to these states.
398        return false;
399      default:
400        break;
401    }
402    return true;
403  }
404
405  @Override
406  protected SplitTableRegionState getState(final int stateId) {
407    return SplitTableRegionState.forNumber(stateId);
408  }
409
410  @Override
411  protected int getStateId(final SplitTableRegionState state) {
412    return state.getNumber();
413  }
414
415  @Override
416  protected SplitTableRegionState getInitialState() {
417    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
418  }
419
420  @Override
421  protected void serializeStateData(ProcedureStateSerializer serializer)
422      throws IOException {
423    super.serializeStateData(serializer);
424
425    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
426        MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
427        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
428        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
429        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI))
430        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI));
431    serializer.serialize(splitTableRegionMsg.build());
432  }
433
434  @Override
435  protected void deserializeStateData(ProcedureStateSerializer serializer)
436      throws IOException {
437    super.deserializeStateData(serializer);
438
439    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
440        serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
441    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
442    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
443    assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2);
444    daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
445    daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
446  }
447
448  @Override
449  public void toStringClassDetails(StringBuilder sb) {
450    sb.append(getClass().getSimpleName());
451    sb.append(" table=");
452    sb.append(getTableName());
453    sb.append(", parent=");
454    sb.append(getParentRegion().getShortNameToLog());
455    sb.append(", daughterA=");
456    sb.append(daughterOneRI.getShortNameToLog());
457    sb.append(", daughterB=");
458    sb.append(daughterTwoRI.getShortNameToLog());
459  }
460
461  private RegionInfo getParentRegion() {
462    return getRegion();
463  }
464
465  @Override
466  public TableOperationType getTableOperationType() {
467    return TableOperationType.REGION_SPLIT;
468  }
469
470  @Override
471  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
472    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
473  }
474
475  private byte[] getSplitRow() {
476    return daughterTwoRI.getStartKey();
477  }
478
479  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
480
481  /**
482   * Prepare to Split region.
483   * @param env MasterProcedureEnv
484   */
485  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
486    // Fail if we are taking snapshot for the given table
487    if (env.getMasterServices().getSnapshotManager()
488      .isTakingSnapshot(getParentRegion().getTable())) {
489      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() +
490        ", because we are taking snapshot for the table " + getParentRegion().getTable()));
491      return false;
492    }
493    // Check whether the region is splittable
494    RegionStateNode node =
495        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
496
497    if (node == null) {
498      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
499    }
500
501    RegionInfo parentHRI = node.getRegionInfo();
502    if (parentHRI == null) {
503      LOG.info("Unsplittable; parent region is null; node={}", node);
504      return false;
505    }
506    // Lookup the parent HRI state from the AM, which has the latest updated info.
507    // Protect against the case where concurrent SPLIT requests came in and succeeded
508    // just before us.
509    if (node.isInState(State.SPLIT)) {
510      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
511      return false;
512    }
513    if (parentHRI.isSplit() || parentHRI.isOffline()) {
514      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
515      return false;
516    }
517
518    // expected parent to be online or closed
519    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
520      // We may have SPLIT already?
521      setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() +
522          " FAILED because state=" + node.getState() + "; expected " +
523          Arrays.toString(EXPECTED_SPLIT_STATES)));
524      return false;
525    }
526
527    // Mostly this check is not used because we already check the switch before submit a split
528    // procedure. Just for safe, check the switch again. This procedure can be rollbacked if
529    // the switch was set to false after submit.
530    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
531      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
532      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() +
533          " failed due to split switch off"));
534      return false;
535    }
536
537    if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) {
538      LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(),
539        parentHRI);
540      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString()
541          + " failed as region split is disabled for the table"));
542      return false;
543    }
544
545    // set node state as SPLITTING
546    node.setState(State.SPLITTING);
547
548    // Since we have the lock and the master is coordinating the operation
549    // we are always able to split the region
550    return true;
551  }
552
553  /**
554   * Action before splitting region in a table.
555   * @param env MasterProcedureEnv
556   */
557  private void preSplitRegion(final MasterProcedureEnv env)
558      throws IOException, InterruptedException {
559    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
560    if (cpHost != null) {
561      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
562    }
563
564    // TODO: Clean up split and merge. Currently all over the place.
565    // Notify QuotaManager and RegionNormalizer
566    try {
567      env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion());
568    } catch (QuotaExceededException e) {
569      // TODO: why is this here? split requests can be submitted by actors other than the normalizer
570      env.getMasterServices()
571        .getRegionNormalizerManager()
572        .planSkipped(NormalizationPlan.PlanType.SPLIT);
573      throw e;
574    }
575  }
576
577  /**
578   * Action after rollback a split table region action.
579   * @param env MasterProcedureEnv
580   */
581  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
582    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
583    if (cpHost != null) {
584      cpHost.postRollBackSplitRegionAction(getUser());
585    }
586  }
587
588  /**
589   * Rollback close parent region
590   */
591  private void openParentRegion(MasterProcedureEnv env) throws IOException {
592    AssignmentManagerUtil.reopenRegionsForRollback(env,
593      Collections.singletonList((getParentRegion())), getRegionReplication(env),
594      getParentRegionServerName(env));
595  }
596
597  /**
598   * Create daughter regions
599   */
600  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
601    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
602    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
603    final FileSystem fs = mfs.getFileSystem();
604    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
605      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
606    regionFs.createSplitsDir(daughterOneRI, daughterTwoRI);
607
608    Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
609
610    assertReferenceFileCount(fs, expectedReferences.getFirst(),
611      regionFs.getSplitsDir(daughterOneRI));
612    //Move the files from the temporary .splits to the final /table/region directory
613    regionFs.commitDaughterRegion(daughterOneRI);
614    assertReferenceFileCount(fs, expectedReferences.getFirst(),
615      new Path(tabledir, daughterOneRI.getEncodedName()));
616
617    assertReferenceFileCount(fs, expectedReferences.getSecond(),
618      regionFs.getSplitsDir(daughterTwoRI));
619    regionFs.commitDaughterRegion(daughterTwoRI);
620    assertReferenceFileCount(fs, expectedReferences.getSecond(),
621      new Path(tabledir, daughterTwoRI.getEncodedName()));
622  }
623
624  /**
625   * Create Split directory
626   * @param env MasterProcedureEnv
627   */
628  private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
629      final HRegionFileSystem regionFs) throws IOException {
630    final Configuration conf = env.getMasterConfiguration();
631    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
632    // The following code sets up a thread pool executor with as many slots as
633    // there's files to split. It then fires up everything, waits for
634    // completion and finally checks for any exception
635    //
636    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
637    // Nothing to unroll here if failure -- re-run createSplitsDir will
638    // clean this up.
639    int nbFiles = 0;
640    final Map<String, Collection<StoreFileInfo>> files =
641        new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
642    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
643      String family = cfd.getNameAsString();
644      Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family);
645      if (sfis == null) {
646        continue;
647      }
648      Collection<StoreFileInfo> filteredSfis = null;
649      for (StoreFileInfo sfi : sfis) {
650        // Filter. There is a lag cleaning up compacted reference files. They get cleared
651        // after a delay in case outstanding Scanners still have references. Because of this,
652        // the listing of the Store content may have straggler reference files. Skip these.
653        // It should be safe to skip references at this point because we checked above with
654        // the region if it thinks it is splittable and if we are here, it thinks it is
655        // splitable.
656        if (sfi.isReference()) {
657          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
658          continue;
659        }
660        if (filteredSfis == null) {
661          filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
662          files.put(family, filteredSfis);
663        }
664        filteredSfis.add(sfi);
665        nbFiles++;
666      }
667    }
668    if (nbFiles == 0) {
669      // no file needs to be splitted.
670      return new Pair<Integer, Integer>(0, 0);
671    }
672    // Max #threads is the smaller of the number of storefiles or the default max determined above.
673    int maxThreads = Math.min(
674      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
675        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
676      nbFiles);
677    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" +
678        getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
679    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
680      new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true)
681        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
682    final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);
683
684    // Split each store file.
685    for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) {
686      byte[] familyName = Bytes.toBytes(e.getKey());
687      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
688      final Collection<StoreFileInfo> storeFiles = e.getValue();
689      if (storeFiles != null && storeFiles.size() > 0) {
690        for (StoreFileInfo storeFileInfo : storeFiles) {
691          // As this procedure is running on master, use CacheConfig.DISABLED means
692          // don't cache any block.
693          StoreFileSplitter sfs =
694              new StoreFileSplitter(regionFs, familyName, new HStoreFile(
695                  storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED));
696          futures.add(threadPool.submit(sfs));
697        }
698      }
699    }
700    // Shutdown the pool
701    threadPool.shutdown();
702
703    // Wait for all the tasks to finish.
704    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
705    // hbase.regionserver.fileSplitTimeout. If set, use its value.
706    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
707      conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
708    try {
709      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
710      if (stillRunning) {
711        threadPool.shutdownNow();
712        // wait for the thread to shutdown completely.
713        while (!threadPool.isTerminated()) {
714          Thread.sleep(50);
715        }
716        throw new IOException(
717            "Took too long to split the" + " files and create the references, aborting split");
718      }
719    } catch (InterruptedException e) {
720      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
721    }
722
723    int daughterA = 0;
724    int daughterB = 0;
725    // Look for any exception
726    for (Future<Pair<Path, Path>> future : futures) {
727      try {
728        Pair<Path, Path> p = future.get();
729        daughterA += p.getFirst() != null ? 1 : 0;
730        daughterB += p.getSecond() != null ? 1 : 0;
731      } catch (InterruptedException e) {
732        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
733      } catch (ExecutionException e) {
734        throw new IOException(e);
735      }
736    }
737
738    if (LOG.isDebugEnabled()) {
739      LOG.debug("pid=" + getProcId() + " split storefiles for region " +
740          getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA +
741          " storefiles, Daughter B: " + daughterB + " storefiles.");
742    }
743    return new Pair<Integer, Integer>(daughterA, daughterB);
744  }
745
746  private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount,
747      final Path dir) throws IOException {
748    if (expectedReferenceFileCount != 0 &&
749        expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) {
750      throw new IOException("Failing split. Expected reference file count isn't equal.");
751    }
752  }
753
754  private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf)
755    throws IOException {
756    if (LOG.isDebugEnabled()) {
757      LOG.debug("pid=" + getProcId() + " splitting started for store file: " +
758          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
759    }
760
761    final byte[] splitRow = getSplitRow();
762    final String familyName = Bytes.toString(family);
763    final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow,
764        false, splitPolicy);
765    final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow,
766       true, splitPolicy);
767    if (LOG.isDebugEnabled()) {
768      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " +
769          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
770    }
771    return new Pair<Path,Path>(path_first, path_second);
772  }
773
774  /**
775   * Utility class used to do the file splitting / reference writing
776   * in parallel instead of sequentially.
777   */
778  private class StoreFileSplitter implements Callable<Pair<Path,Path>> {
779    private final HRegionFileSystem regionFs;
780    private final byte[] family;
781    private final HStoreFile sf;
782
783    /**
784     * Constructor that takes what it needs to split
785     * @param regionFs the file system
786     * @param family Family that contains the store file
787     * @param sf which file
788     */
789    public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) {
790      this.regionFs = regionFs;
791      this.sf = sf;
792      this.family = family;
793    }
794
795    @Override
796    public Pair<Path,Path> call() throws IOException {
797      return splitStoreFile(regionFs, family, sf);
798    }
799  }
800
801  /**
802   * Post split region actions before the Point-of-No-Return step
803   * @param env MasterProcedureEnv
804   **/
805  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
806      throws IOException, InterruptedException {
807    final List<Mutation> metaEntries = new ArrayList<Mutation>();
808    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
809    if (cpHost != null) {
810      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
811      try {
812        for (Mutation p : metaEntries) {
813          RegionInfo.parseRegionName(p.getRow());
814        }
815      } catch (IOException e) {
816        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
817            + "region name."
818            + "Mutations from coprocessor should only for hbase:meta table.");
819        throw e;
820      }
821    }
822  }
823
824  /**
825   * Add daughter regions to META
826   * @param env MasterProcedureEnv
827   */
828  private void updateMeta(final MasterProcedureEnv env) throws IOException {
829    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
830        daughterOneRI, daughterTwoRI);
831  }
832
833  /**
834   * Pre split region actions after the Point-of-No-Return step
835   * @param env MasterProcedureEnv
836   **/
837  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
838      throws IOException, InterruptedException {
839    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
840    if (cpHost != null) {
841      cpHost.preSplitAfterMETAAction(getUser());
842    }
843  }
844
845  /**
846   * Post split region actions
847   * @param env MasterProcedureEnv
848   **/
849  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
850    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
851    if (cpHost != null) {
852      cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser());
853    }
854  }
855
856  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
857    return env.getMasterServices().getAssignmentManager().getRegionStates()
858      .getRegionServerOfRegion(getParentRegion());
859  }
860
861  private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env)
862      throws IOException {
863    return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env,
864      Stream.of(getParentRegion()), getRegionReplication(env));
865  }
866
867  private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
868      throws IOException {
869    List<RegionInfo> hris = new ArrayList<RegionInfo>(2);
870    hris.add(daughterOneRI);
871    hris.add(daughterTwoRI);
872    return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris,
873      getRegionReplication(env), getParentRegionServerName(env));
874  }
875
876  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
877    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
878    return htd.getRegionReplication();
879  }
880
881  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
882    MasterFileSystem fs = env.getMasterFileSystem();
883    long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(),
884      getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
885    if (maxSequenceId > 0) {
886      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
887        getWALRegionDir(env, daughterOneRI), maxSequenceId);
888      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
889        getWALRegionDir(env, daughterTwoRI), maxSequenceId);
890    }
891  }
892
893  @Override
894  protected boolean abort(MasterProcedureEnv env) {
895    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
896    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
897    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
898    return isRollbackSupported(getCurrentState())? super.abort(env): false;
899  }
900}