001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.io.InterruptedIOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.concurrent.Callable;
030import java.util.concurrent.ExecutionException;
031import java.util.concurrent.ExecutorService;
032import java.util.concurrent.Executors;
033import java.util.concurrent.Future;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Stream;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.fs.FileSystem;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.hbase.DoNotRetryIOException;
040import org.apache.hadoop.hbase.HConstants;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.UnknownRegionException;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
045import org.apache.hadoop.hbase.client.MasterSwitchType;
046import org.apache.hadoop.hbase.client.Mutation;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.TableDescriptor;
050import org.apache.hadoop.hbase.io.hfile.CacheConfig;
051import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
052import org.apache.hadoop.hbase.master.MasterFileSystem;
053import org.apache.hadoop.hbase.master.RegionState.State;
054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
060import org.apache.hadoop.hbase.quotas.QuotaExceededException;
061import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
062import org.apache.hadoop.hbase.regionserver.HStore;
063import org.apache.hadoop.hbase.regionserver.HStoreFile;
064import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
065import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
066import org.apache.hadoop.hbase.util.Bytes;
067import org.apache.hadoop.hbase.util.CommonFSUtils;
068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
069import org.apache.hadoop.hbase.util.FSUtils;
070import org.apache.hadoop.hbase.util.Pair;
071import org.apache.hadoop.hbase.util.Threads;
072import org.apache.hadoop.hbase.wal.WALSplitUtil;
073import org.apache.hadoop.util.ReflectionUtils;
074import org.apache.yetus.audience.InterfaceAudience;
075import org.slf4j.Logger;
076import org.slf4j.LoggerFactory;
077
078import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
079
080import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
081import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
082import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
083import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
084
085/**
086 * The procedure to split a region in a table.
087 * Takes lock on the parent region.
088 * It holds the lock for the life of the procedure.
089 * <p>Throws exception on construction if determines context hostile to spllt (cluster going
090 * down or master is shutting down or table is disabled).</p>
091 */
092@InterfaceAudience.Private
093public class SplitTableRegionProcedure
094    extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
095  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
096  private RegionInfo daughterOneRI;
097  private RegionInfo daughterTwoRI;
098  private byte[] bestSplitRow;
099  private RegionSplitPolicy splitPolicy;
100
101  public SplitTableRegionProcedure() {
102    // Required by the Procedure framework to create the procedure on replay
103  }
104
105  public SplitTableRegionProcedure(final MasterProcedureEnv env,
106      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
107    super(env, regionToSplit);
108    preflightChecks(env, true);
109    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
110    // we fail-fast on construction. There it skips the split with just a warning.
111    checkOnline(env, regionToSplit);
112    this.bestSplitRow = splitRow;
113    checkSplittable(env, regionToSplit, bestSplitRow);
114    final TableName table = regionToSplit.getTable();
115    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
116    this.daughterOneRI = RegionInfoBuilder.newBuilder(table)
117        .setStartKey(regionToSplit.getStartKey())
118        .setEndKey(bestSplitRow)
119        .setSplit(false)
120        .setRegionId(rid)
121        .build();
122    this.daughterTwoRI = RegionInfoBuilder.newBuilder(table)
123        .setStartKey(bestSplitRow)
124        .setEndKey(regionToSplit.getEndKey())
125        .setSplit(false)
126        .setRegionId(rid)
127        .build();
128    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
129    if(htd.getRegionSplitPolicyClassName() != null) {
130      // Since we don't have region reference here, creating the split policy instance without it.
131      // This can be used to invoke methods which don't require Region reference. This instantiation
132      // of a class on Master-side though it only makes sense on the RegionServer-side is
133      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
134      Class<? extends RegionSplitPolicy> clazz =
135          RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration());
136      this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration());
137    }
138  }
139
140  @Override
141  protected LockState acquireLock(final MasterProcedureEnv env) {
142    if (env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(),
143      daughterOneRI, daughterTwoRI)) {
144      try {
145        LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks());
146      } catch (IOException e) {
147        // Ignore, just for logging
148      }
149      return LockState.LOCK_EVENT_WAIT;
150    }
151    return LockState.LOCK_ACQUIRED;
152  }
153
154  @Override
155  protected void releaseLock(final MasterProcedureEnv env) {
156    env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI,
157      daughterTwoRI);
158  }
159
160  @VisibleForTesting
161  public RegionInfo getDaughterOneRI() {
162    return daughterOneRI;
163  }
164
165  @VisibleForTesting
166  public RegionInfo getDaughterTwoRI() {
167    return daughterTwoRI;
168  }
169
170  /**
171   * Check whether the region is splittable
172   * @param env MasterProcedureEnv
173   * @param regionToSplit parent Region to be split
174   * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS
175   * @throws IOException
176   */
177  private void checkSplittable(final MasterProcedureEnv env,
178      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
179    // Ask the remote RS if this region is splittable.
180    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time.
181    if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
182      throw new IllegalArgumentException ("Can't invoke split on non-default regions directly");
183    }
184    RegionStateNode node =
185        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
186    IOException splittableCheckIOE = null;
187    boolean splittable = false;
188    if (node != null) {
189      try {
190        if (bestSplitRow == null || bestSplitRow.length == 0) {
191          LOG
192            .info("splitKey isn't explicitly specified, will try to find a best split key from RS");
193        }
194        // Always set bestSplitRow request as true here,
195        // need to call Region#checkSplit to check it splittable or not
196        GetRegionInfoResponse response = AssignmentManagerUtil.getRegionInfoResponse(env,
197          node.getRegionLocation(), node.getRegionInfo(), true);
198        if(bestSplitRow == null || bestSplitRow.length == 0) {
199          bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
200        }
201        splittable = response.hasSplittable() && response.getSplittable();
202
203        if (LOG.isDebugEnabled()) {
204          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
205        }
206      } catch (IOException e) {
207        splittableCheckIOE = e;
208      }
209    }
210
211    if (!splittable) {
212      IOException e =
213        new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
214      if (splittableCheckIOE != null) {
215        e.initCause(splittableCheckIOE);
216      }
217      throw e;
218    }
219
220    if (bestSplitRow == null || bestSplitRow.length == 0) {
221      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " +
222        "maybe table is too small for auto split. For force split, try specifying split row");
223    }
224
225    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
226      throw new DoNotRetryIOException(
227        "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow));
228    }
229
230    if (!regionToSplit.containsRow(bestSplitRow)) {
231      throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" +
232        Bytes.toStringBinary(splitRow) + " region: " + regionToSplit);
233    }
234  }
235
236  /**
237   * Calculate daughter regionid to use.
238   * @param hri Parent {@link RegionInfo}
239   * @return Daughter region id (timestamp) to use.
240   */
241  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
242    long rid = EnvironmentEdgeManager.currentTime();
243    // Regionid is timestamp.  Can't be less than that of parent else will insert
244    // at wrong location in hbase:meta (See HBASE-710).
245    if (rid < hri.getRegionId()) {
246      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
247        " but current time here is " + rid);
248      rid = hri.getRegionId() + 1;
249    }
250    return rid;
251  }
252
253  private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException {
254    AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()),
255      getRegionReplication(env));
256  }
257
258  private void checkClosedRegions(MasterProcedureEnv env) throws IOException {
259    // theoretically this should not happen any more after we use TRSP, but anyway let's add a check
260    // here
261    AssignmentManagerUtil.checkClosedRegion(env, getParentRegion());
262  }
263
264  @Override
265  protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state)
266      throws InterruptedException {
267    LOG.trace("{} execute state={}", this, state);
268
269    try {
270      switch (state) {
271        case SPLIT_TABLE_REGION_PREPARE:
272          if (prepareSplitRegion(env)) {
273            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
274            break;
275          } else {
276            return Flow.NO_MORE_STATE;
277          }
278        case SPLIT_TABLE_REGION_PRE_OPERATION:
279          preSplitRegion(env);
280          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
281          break;
282        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
283          addChildProcedure(createUnassignProcedures(env));
284          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
285          break;
286        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
287          checkClosedRegions(env);
288          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
289          break;
290        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
291          removeNonDefaultReplicas(env);
292          createDaughterRegions(env);
293          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
294          break;
295        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
296          writeMaxSequenceIdFile(env);
297          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
298          break;
299        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
300          preSplitRegionBeforeMETA(env);
301          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
302          break;
303        case SPLIT_TABLE_REGION_UPDATE_META:
304          updateMeta(env);
305          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
306          break;
307        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
308          preSplitRegionAfterMETA(env);
309          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
310          break;
311        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
312          addChildProcedure(createAssignProcedures(env));
313          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
314          break;
315        case SPLIT_TABLE_REGION_POST_OPERATION:
316          postSplitRegion(env);
317          return Flow.NO_MORE_STATE;
318        default:
319          throw new UnsupportedOperationException(this + " unhandled state=" + state);
320      }
321    } catch (IOException e) {
322      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
323      if (!isRollbackSupported(state)) {
324        // We reach a state that cannot be rolled back. We just need to keep retrying.
325        LOG.warn(msg, e);
326      } else {
327        LOG.error(msg, e);
328        setFailure("master-split-regions", e);
329      }
330    }
331    // if split fails,  need to call ((HRegion)parent).clearSplit() when it is a force split
332    return Flow.HAS_MORE_STATE;
333  }
334
335  /**
336   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously
337   * submitted for parent region to be split (rollback doesn't wait on the completion of the
338   * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures.
339   * See HBASE-19851 for details.
340   */
341  @Override
342  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
343      throws IOException, InterruptedException {
344    LOG.trace("{} rollback state={}", this, state);
345
346    try {
347      switch (state) {
348        case SPLIT_TABLE_REGION_POST_OPERATION:
349        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
350        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
351        case SPLIT_TABLE_REGION_UPDATE_META:
352          // PONR
353          throw new UnsupportedOperationException(this + " unhandled state=" + state);
354        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
355          break;
356        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
357        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
358          // Doing nothing, as re-open parent region would clean up daughter region directories.
359          break;
360        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
361          // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION,
362          // we will bring parent region online
363          break;
364        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
365          openParentRegion(env);
366          break;
367        case SPLIT_TABLE_REGION_PRE_OPERATION:
368          postRollBackSplitRegion(env);
369          break;
370        case SPLIT_TABLE_REGION_PREPARE:
371          break; // nothing to do
372        default:
373          throw new UnsupportedOperationException(this + " unhandled state=" + state);
374      }
375    } catch (IOException e) {
376      // This will be retried. Unless there is a bug in the code,
377      // this should be just a "temporary error" (e.g. network down)
378      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state +
379          " for splitting the region "
380        + getParentRegion().getEncodedName() + " in table " + getTableName(), e);
381      throw e;
382    }
383  }
384
385  /*
386   * Check whether we are in the state that can be rollback
387   */
388  @Override
389  protected boolean isRollbackSupported(final SplitTableRegionState state) {
390    switch (state) {
391      case SPLIT_TABLE_REGION_POST_OPERATION:
392      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
393      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
394      case SPLIT_TABLE_REGION_UPDATE_META:
395        // It is not safe to rollback if we reach to these states.
396        return false;
397      default:
398        break;
399    }
400    return true;
401  }
402
403  @Override
404  protected SplitTableRegionState getState(final int stateId) {
405    return SplitTableRegionState.forNumber(stateId);
406  }
407
408  @Override
409  protected int getStateId(final SplitTableRegionState state) {
410    return state.getNumber();
411  }
412
413  @Override
414  protected SplitTableRegionState getInitialState() {
415    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
416  }
417
418  @Override
419  protected void serializeStateData(ProcedureStateSerializer serializer)
420      throws IOException {
421    super.serializeStateData(serializer);
422
423    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
424        MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
425        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
426        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
427        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI))
428        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI));
429    serializer.serialize(splitTableRegionMsg.build());
430  }
431
432  @Override
433  protected void deserializeStateData(ProcedureStateSerializer serializer)
434      throws IOException {
435    super.deserializeStateData(serializer);
436
437    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
438        serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
439    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
440    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
441    assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2);
442    daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
443    daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
444  }
445
446  @Override
447  public void toStringClassDetails(StringBuilder sb) {
448    sb.append(getClass().getSimpleName());
449    sb.append(" table=");
450    sb.append(getTableName());
451    sb.append(", parent=");
452    sb.append(getParentRegion().getShortNameToLog());
453    sb.append(", daughterA=");
454    sb.append(daughterOneRI.getShortNameToLog());
455    sb.append(", daughterB=");
456    sb.append(daughterTwoRI.getShortNameToLog());
457  }
458
459  private RegionInfo getParentRegion() {
460    return getRegion();
461  }
462
463  @Override
464  public TableOperationType getTableOperationType() {
465    return TableOperationType.REGION_SPLIT;
466  }
467
468  @Override
469  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
470    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
471  }
472
473  private byte[] getSplitRow() {
474    return daughterTwoRI.getStartKey();
475  }
476
477  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
478
479  /**
480   * Prepare to Split region.
481   * @param env MasterProcedureEnv
482   */
483  @VisibleForTesting
484  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
485    // Fail if we are taking snapshot for the given table
486    if (env.getMasterServices().getSnapshotManager()
487      .isTakingSnapshot(getParentRegion().getTable())) {
488      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() +
489        ", because we are taking snapshot for the table " + getParentRegion().getTable()));
490      return false;
491    }
492    // Check whether the region is splittable
493    RegionStateNode node =
494        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
495
496    if (node == null) {
497      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
498    }
499
500    RegionInfo parentHRI = node.getRegionInfo();
501    if (parentHRI == null) {
502      LOG.info("Unsplittable; parent region is null; node={}", node);
503      return false;
504    }
505    // Lookup the parent HRI state from the AM, which has the latest updated info.
506    // Protect against the case where concurrent SPLIT requests came in and succeeded
507    // just before us.
508    if (node.isInState(State.SPLIT)) {
509      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
510      return false;
511    }
512    if (parentHRI.isSplit() || parentHRI.isOffline()) {
513      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
514      return false;
515    }
516
517    // expected parent to be online or closed
518    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
519      // We may have SPLIT already?
520      setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() +
521          " FAILED because state=" + node.getState() + "; expected " +
522          Arrays.toString(EXPECTED_SPLIT_STATES)));
523      return false;
524    }
525
526    // Mostly this check is not used because we already check the switch before submit a split
527    // procedure. Just for safe, check the switch again. This procedure can be rollbacked if
528    // the switch was set to false after submit.
529    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
530      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
531      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() +
532          " failed due to split switch off"));
533      return false;
534    }
535
536    if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) {
537      LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(),
538        parentHRI);
539      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString()
540          + " failed as region split is disabled for the table"));
541      return false;
542    }
543
544    // set node state as SPLITTING
545    node.setState(State.SPLITTING);
546
547    // Since we have the lock and the master is coordinating the operation
548    // we are always able to split the region
549    return true;
550  }
551
552  /**
553   * Action before splitting region in a table.
554   * @param env MasterProcedureEnv
555   */
556  private void preSplitRegion(final MasterProcedureEnv env)
557      throws IOException, InterruptedException {
558    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
559    if (cpHost != null) {
560      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
561    }
562
563    // TODO: Clean up split and merge. Currently all over the place.
564    // Notify QuotaManager and RegionNormalizer
565    try {
566      env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion());
567    } catch (QuotaExceededException e) {
568      env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(),
569          NormalizationPlan.PlanType.SPLIT);
570      throw e;
571    }
572  }
573
574  /**
575   * Action after rollback a split table region action.
576   * @param env MasterProcedureEnv
577   */
578  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
579    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
580    if (cpHost != null) {
581      cpHost.postRollBackSplitRegionAction(getUser());
582    }
583  }
584
585  /**
586   * Rollback close parent region
587   */
588  private void openParentRegion(MasterProcedureEnv env) throws IOException {
589    AssignmentManagerUtil.reopenRegionsForRollback(env,
590      Collections.singletonList((getParentRegion())), getRegionReplication(env),
591      getParentRegionServerName(env));
592  }
593
594  /**
595   * Create daughter regions
596   */
597  @VisibleForTesting
598  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
599    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
600    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
601    final FileSystem fs = mfs.getFileSystem();
602    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
603      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
604    regionFs.createSplitsDir(daughterOneRI, daughterTwoRI);
605
606    Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
607
608    assertReferenceFileCount(fs, expectedReferences.getFirst(),
609      regionFs.getSplitsDir(daughterOneRI));
610    //Move the files from the temporary .splits to the final /table/region directory
611    regionFs.commitDaughterRegion(daughterOneRI);
612    assertReferenceFileCount(fs, expectedReferences.getFirst(),
613      new Path(tabledir, daughterOneRI.getEncodedName()));
614
615    assertReferenceFileCount(fs, expectedReferences.getSecond(),
616      regionFs.getSplitsDir(daughterTwoRI));
617    regionFs.commitDaughterRegion(daughterTwoRI);
618    assertReferenceFileCount(fs, expectedReferences.getSecond(),
619      new Path(tabledir, daughterTwoRI.getEncodedName()));
620  }
621
622  /**
623   * Create Split directory
624   * @param env MasterProcedureEnv
625   */
626  private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
627      final HRegionFileSystem regionFs) throws IOException {
628    final Configuration conf = env.getMasterConfiguration();
629    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
630    // The following code sets up a thread pool executor with as many slots as
631    // there's files to split. It then fires up everything, waits for
632    // completion and finally checks for any exception
633    //
634    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
635    // Nothing to unroll here if failure -- re-run createSplitsDir will
636    // clean this up.
637    int nbFiles = 0;
638    final Map<String, Collection<StoreFileInfo>> files =
639        new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
640    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
641      String family = cfd.getNameAsString();
642      Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family);
643      if (sfis == null) {
644        continue;
645      }
646      Collection<StoreFileInfo> filteredSfis = null;
647      for (StoreFileInfo sfi : sfis) {
648        // Filter. There is a lag cleaning up compacted reference files. They get cleared
649        // after a delay in case outstanding Scanners still have references. Because of this,
650        // the listing of the Store content may have straggler reference files. Skip these.
651        // It should be safe to skip references at this point because we checked above with
652        // the region if it thinks it is splittable and if we are here, it thinks it is
653        // splitable.
654        if (sfi.isReference()) {
655          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
656          continue;
657        }
658        if (filteredSfis == null) {
659          filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
660          files.put(family, filteredSfis);
661        }
662        filteredSfis.add(sfi);
663        nbFiles++;
664      }
665    }
666    if (nbFiles == 0) {
667      // no file needs to be splitted.
668      return new Pair<Integer, Integer>(0, 0);
669    }
670    // Max #threads is the smaller of the number of storefiles or the default max determined above.
671    int maxThreads = Math.min(
672      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
673        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
674      nbFiles);
675    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" +
676        getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
677    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
678      Threads.newDaemonThreadFactory("StoreFileSplitter-%1$d"));
679    final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);
680
681    // Split each store file.
682    for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) {
683      byte[] familyName = Bytes.toBytes(e.getKey());
684      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
685      final Collection<StoreFileInfo> storeFiles = e.getValue();
686      if (storeFiles != null && storeFiles.size() > 0) {
687        for (StoreFileInfo storeFileInfo : storeFiles) {
688          // As this procedure is running on master, use CacheConfig.DISABLED means
689          // don't cache any block.
690          StoreFileSplitter sfs =
691              new StoreFileSplitter(regionFs, familyName, new HStoreFile(
692                  storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED));
693          futures.add(threadPool.submit(sfs));
694        }
695      }
696    }
697    // Shutdown the pool
698    threadPool.shutdown();
699
700    // Wait for all the tasks to finish.
701    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
702    // hbase.regionserver.fileSplitTimeout. If set, use its value.
703    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
704      conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
705    try {
706      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
707      if (stillRunning) {
708        threadPool.shutdownNow();
709        // wait for the thread to shutdown completely.
710        while (!threadPool.isTerminated()) {
711          Thread.sleep(50);
712        }
713        throw new IOException(
714            "Took too long to split the" + " files and create the references, aborting split");
715      }
716    } catch (InterruptedException e) {
717      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
718    }
719
720    int daughterA = 0;
721    int daughterB = 0;
722    // Look for any exception
723    for (Future<Pair<Path, Path>> future : futures) {
724      try {
725        Pair<Path, Path> p = future.get();
726        daughterA += p.getFirst() != null ? 1 : 0;
727        daughterB += p.getSecond() != null ? 1 : 0;
728      } catch (InterruptedException e) {
729        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
730      } catch (ExecutionException e) {
731        throw new IOException(e);
732      }
733    }
734
735    if (LOG.isDebugEnabled()) {
736      LOG.debug("pid=" + getProcId() + " split storefiles for region " +
737          getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA +
738          " storefiles, Daughter B: " + daughterB + " storefiles.");
739    }
740    return new Pair<Integer, Integer>(daughterA, daughterB);
741  }
742
743  private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount,
744      final Path dir) throws IOException {
745    if (expectedReferenceFileCount != 0 &&
746        expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) {
747      throw new IOException("Failing split. Expected reference file count isn't equal.");
748    }
749  }
750
751  private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf)
752    throws IOException {
753    if (LOG.isDebugEnabled()) {
754      LOG.debug("pid=" + getProcId() + " splitting started for store file: " +
755          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
756    }
757
758    final byte[] splitRow = getSplitRow();
759    final String familyName = Bytes.toString(family);
760    final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow,
761        false, splitPolicy);
762    final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow,
763       true, splitPolicy);
764    if (LOG.isDebugEnabled()) {
765      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " +
766          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
767    }
768    return new Pair<Path,Path>(path_first, path_second);
769  }
770
771  /**
772   * Utility class used to do the file splitting / reference writing
773   * in parallel instead of sequentially.
774   */
775  private class StoreFileSplitter implements Callable<Pair<Path,Path>> {
776    private final HRegionFileSystem regionFs;
777    private final byte[] family;
778    private final HStoreFile sf;
779
780    /**
781     * Constructor that takes what it needs to split
782     * @param regionFs the file system
783     * @param family Family that contains the store file
784     * @param sf which file
785     */
786    public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) {
787      this.regionFs = regionFs;
788      this.sf = sf;
789      this.family = family;
790    }
791
792    @Override
793    public Pair<Path,Path> call() throws IOException {
794      return splitStoreFile(regionFs, family, sf);
795    }
796  }
797
798  /**
799   * Post split region actions before the Point-of-No-Return step
800   * @param env MasterProcedureEnv
801   **/
802  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
803      throws IOException, InterruptedException {
804    final List<Mutation> metaEntries = new ArrayList<Mutation>();
805    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
806    if (cpHost != null) {
807      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
808      try {
809        for (Mutation p : metaEntries) {
810          RegionInfo.parseRegionName(p.getRow());
811        }
812      } catch (IOException e) {
813        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
814            + "region name."
815            + "Mutations from coprocessor should only for hbase:meta table.");
816        throw e;
817      }
818    }
819  }
820
821  /**
822   * Add daughter regions to META
823   * @param env MasterProcedureEnv
824   */
825  private void updateMeta(final MasterProcedureEnv env) throws IOException {
826    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
827        daughterOneRI, daughterTwoRI);
828  }
829
830  /**
831   * Pre split region actions after the Point-of-No-Return step
832   * @param env MasterProcedureEnv
833   **/
834  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
835      throws IOException, InterruptedException {
836    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
837    if (cpHost != null) {
838      cpHost.preSplitAfterMETAAction(getUser());
839    }
840  }
841
842  /**
843   * Post split region actions
844   * @param env MasterProcedureEnv
845   **/
846  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
847    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
848    if (cpHost != null) {
849      cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser());
850    }
851  }
852
853  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
854    return env.getMasterServices().getAssignmentManager().getRegionStates()
855      .getRegionServerOfRegion(getParentRegion());
856  }
857
858  private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env)
859      throws IOException {
860    return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env,
861      Stream.of(getParentRegion()), getRegionReplication(env));
862  }
863
864  private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
865      throws IOException {
866    List<RegionInfo> hris = new ArrayList<RegionInfo>(2);
867    hris.add(daughterOneRI);
868    hris.add(daughterTwoRI);
869    return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris,
870      getRegionReplication(env), getParentRegionServerName(env));
871  }
872
873  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
874    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
875    return htd.getRegionReplication();
876  }
877
878  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
879    MasterFileSystem fs = env.getMasterFileSystem();
880    long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(),
881      getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
882    if (maxSequenceId > 0) {
883      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
884        getWALRegionDir(env, daughterOneRI), maxSequenceId);
885      WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
886        getWALRegionDir(env, daughterTwoRI), maxSequenceId);
887    }
888  }
889
890  @Override
891  protected boolean abort(MasterProcedureEnv env) {
892    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
893    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
894    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
895    return isRollbackSupported(getCurrentState())? super.abort(env): false;
896  }
897}