001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_ENABLE_SEPARATE_CHILD_REGIONS;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.Collections;
025import java.util.List;
026import java.util.ListIterator;
027import java.util.stream.Collectors;
028import java.util.stream.IntStream;
029import java.util.stream.Stream;
030import org.apache.commons.lang3.ArrayUtils;
031import org.apache.hadoop.hbase.HBaseIOException;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.RegionReplicaUtil;
037import org.apache.hadoop.hbase.favored.FavoredNodesManager;
038import org.apache.hadoop.hbase.master.RegionState;
039import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
040import org.apache.hadoop.hbase.util.FutureUtils;
041import org.apache.hadoop.hbase.wal.WALSplitUtil;
042import org.apache.yetus.audience.InterfaceAudience;
043
044import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
045import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest;
046import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
047
048/**
049 * Utility for this assignment package only.
050 */
051@InterfaceAudience.Private
052final class AssignmentManagerUtil {
053  private static final int DEFAULT_REGION_REPLICA = 1;
054
055  private AssignmentManagerUtil() {
056  }
057
058  /**
059   * Raw call to remote regionserver to get info on a particular region.
060   * @throws IOException Let it out so can report this IOE as reason for failure
061   */
062  static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env,
063    final ServerName regionLocation, final RegionInfo hri) throws IOException {
064    return getRegionInfoResponse(env, regionLocation, hri, false);
065  }
066
067  static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env,
068    final ServerName regionLocation, final RegionInfo hri, boolean includeBestSplitRow)
069    throws IOException {
070    AsyncRegionServerAdmin admin =
071      env.getMasterServices().getAsyncClusterConnection().getRegionServerAdmin(regionLocation);
072    GetRegionInfoRequest request = null;
073    if (includeBestSplitRow) {
074      request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName(), false, true);
075    } else {
076      request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName());
077    }
078    return FutureUtils.get(admin.getRegionInfo(request));
079  }
080
081  private static void lock(List<RegionStateNode> regionNodes) {
082    regionNodes.iterator().forEachRemaining(RegionStateNode::lock);
083  }
084
085  private static void unlock(List<RegionStateNode> regionNodes) {
086    for (ListIterator<RegionStateNode> iter = regionNodes.listIterator(regionNodes.size()); iter
087      .hasPrevious();) {
088      iter.previous().unlock();
089    }
090  }
091
092  static TransitRegionStateProcedure[] createUnassignProceduresForSplitOrMerge(
093    MasterProcedureEnv env, Stream<RegionInfo> regions, int regionReplication) throws IOException {
094    List<RegionStateNode> regionNodes = regions
095      .flatMap(hri -> IntStream.range(0, regionReplication)
096        .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i)))
097      .map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode)
098      .collect(Collectors.toList());
099    TransitRegionStateProcedure[] procs = new TransitRegionStateProcedure[regionNodes.size()];
100    boolean rollback = true;
101    int i = 0;
102    // hold the lock at once, and then release it in finally. This is important as SCP may jump in
103    // if we release the lock in the middle when we want to do rollback, and cause problems.
104    lock(regionNodes);
105    try {
106      for (; i < procs.length; i++) {
107        RegionStateNode regionNode = regionNodes.get(i);
108        TransitRegionStateProcedure proc =
109          TransitRegionStateProcedure.unassignSplitMerge(env, regionNode.getRegionInfo());
110        if (regionNode.getProcedure() != null) {
111          throw new HBaseIOException(
112            "The parent region " + regionNode + " is currently in transition, give up");
113        }
114        regionNode.setProcedure(proc);
115        procs[i] = proc;
116      }
117      // all succeeded, set rollback to false
118      rollback = false;
119    } finally {
120      if (rollback) {
121        for (;;) {
122          i--;
123          if (i < 0) {
124            break;
125          }
126          RegionStateNode regionNode = regionNodes.get(i);
127          regionNode.unsetProcedure(procs[i]);
128        }
129      }
130      unlock(regionNodes);
131    }
132    return procs;
133  }
134
135  /**
136   * Create assign procedures for the give regions, according to the {@code regionReplication}.
137   * <p/>
138   * For rolling back, we will submit procedures directly to the {@code ProcedureExecutor}, so it is
139   * possible that we persist the newly scheduled procedures, and then crash before persisting the
140   * rollback state, so when we arrive here the second time, it is possible that some regions have
141   * already been associated with a TRSP.
142   * @param ignoreIfInTransition if true, will skip creating TRSP for the given region if it is
143   *                             already in transition, otherwise we will add an assert that it
144   *                             should not in transition.
145   */
146  private static TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env,
147    List<RegionInfo> regions, int regionReplication, ServerName targetServer,
148    boolean ignoreIfInTransition) {
149    // create the assign procs only for the primary region using the targetServer
150    TransitRegionStateProcedure[] primaryRegionProcs =
151      regions.stream().map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode)
152        .map(regionNode -> {
153          TransitRegionStateProcedure proc =
154            TransitRegionStateProcedure.assign(env, regionNode.getRegionInfo(), targetServer);
155          regionNode.lock();
156          try {
157            if (ignoreIfInTransition) {
158              if (regionNode.isTransitionScheduled()) {
159                return null;
160              }
161            } else {
162              // should never fail, as we have the exclusive region lock, and the region is newly
163              // created, or has been successfully closed so should not be on any servers, so SCP
164              // will
165              // not process it either.
166              assert !regionNode.isTransitionScheduled();
167            }
168            regionNode.setProcedure(proc);
169          } finally {
170            regionNode.unlock();
171          }
172          return proc;
173        }).filter(p -> p != null).toArray(TransitRegionStateProcedure[]::new);
174    if (regionReplication == DEFAULT_REGION_REPLICA) {
175      // this is the default case
176      return primaryRegionProcs;
177    }
178    // collect the replica region infos
179    List<RegionInfo> replicaRegionInfos =
180      new ArrayList<RegionInfo>(regions.size() * (regionReplication - 1));
181    for (RegionInfo hri : regions) {
182      // start the index from 1
183      for (int i = 1; i < regionReplication; i++) {
184        RegionInfo ri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
185        // apply ignoreRITs to replica regions as well.
186        if (
187          !ignoreIfInTransition || !env.getAssignmentManager().getRegionStates()
188            .getOrCreateRegionStateNode(ri).isTransitionScheduled()
189        ) {
190          replicaRegionInfos.add(ri);
191        }
192      }
193    }
194
195    // create round robin procs. Note that we exclude the primary region's target server
196    TransitRegionStateProcedure[] replicaRegionAssignProcs =
197      env.getAssignmentManager().createRoundRobinAssignProcedures(replicaRegionInfos,
198        Collections.singletonList(targetServer));
199    // combine both the procs and return the result
200    return ArrayUtils.addAll(primaryRegionProcs, replicaRegionAssignProcs);
201  }
202
203  /**
204   * Create round robin assign procedures for the given regions, according to the
205   * {@code regionReplication}.
206   * <p/>
207   * For rolling back, we will submit procedures directly to the {@code ProcedureExecutor}, so it is
208   * possible that we persist the newly scheduled procedures, and then crash before persisting the
209   * rollback state, so when we arrive here the second time, it is possible that some regions have
210   * already been associated with a TRSP.
211   * @param ignoreIfInTransition if true, will skip creating TRSP for the given region if it is
212   *                             already in transition, otherwise we will add an assert that it
213   *                             should not in transition.
214   */
215  private static TransitRegionStateProcedure[] createRoundRobinAssignProcedures(
216    MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication,
217    List<ServerName> serversToExclude, boolean ignoreIfInTransition) {
218    List<RegionInfo> regionsAndReplicas = new ArrayList<>(regions);
219    if (regionReplication != DEFAULT_REGION_REPLICA) {
220
221      // collect the replica region infos
222      List<RegionInfo> replicaRegionInfos =
223        new ArrayList<RegionInfo>(regions.size() * (regionReplication - 1));
224      for (RegionInfo hri : regions) {
225        // start the index from 1
226        for (int i = 1; i < regionReplication; i++) {
227          replicaRegionInfos.add(RegionReplicaUtil.getRegionInfoForReplica(hri, i));
228        }
229      }
230      regionsAndReplicas.addAll(replicaRegionInfos);
231    }
232    if (ignoreIfInTransition) {
233      for (RegionInfo region : regionsAndReplicas) {
234        if (
235          env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region)
236            .isTransitionScheduled()
237        ) {
238          return null;
239        }
240      }
241    }
242    // create round robin procs. Note that we exclude the primary region's target server
243    return env.getAssignmentManager().createRoundRobinAssignProcedures(regionsAndReplicas,
244      serversToExclude);
245  }
246
247  static TransitRegionStateProcedure[] createAssignProceduresForSplitDaughters(
248    MasterProcedureEnv env, List<RegionInfo> daughters, int regionReplication,
249    ServerName parentServer) {
250    if (
251      env.getMasterConfiguration().getBoolean(HConstants.HBASE_ENABLE_SEPARATE_CHILD_REGIONS,
252        DEFAULT_HBASE_ENABLE_SEPARATE_CHILD_REGIONS)
253    ) {
254      // keep one daughter on the parent region server
255      TransitRegionStateProcedure[] daughterOne = createAssignProcedures(env,
256        Collections.singletonList(daughters.get(0)), regionReplication, parentServer, false);
257      // round robin assign the other daughter
258      TransitRegionStateProcedure[] daughterTwo =
259        createRoundRobinAssignProcedures(env, Collections.singletonList(daughters.get(1)),
260          regionReplication, Collections.singletonList(parentServer), false);
261      return ArrayUtils.addAll(daughterOne, daughterTwo);
262    }
263    return createAssignProceduresForOpeningNewRegions(env, daughters, regionReplication,
264      parentServer);
265  }
266
267  static TransitRegionStateProcedure[] createAssignProceduresForOpeningNewRegions(
268    MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication,
269    ServerName targetServer) {
270    return createAssignProcedures(env, regions, regionReplication, targetServer, false);
271  }
272
273  static void reopenRegionsForRollback(MasterProcedureEnv env, List<RegionInfo> regions,
274    int regionReplication, ServerName targetServer) {
275    TransitRegionStateProcedure[] procs =
276      createAssignProcedures(env, regions, regionReplication, targetServer, true);
277    if (procs.length > 0) {
278      env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs);
279    }
280  }
281
282  static void removeNonDefaultReplicas(MasterProcedureEnv env, Stream<RegionInfo> regions,
283    int regionReplication) {
284    // Remove from in-memory states
285    regions.flatMap(hri -> IntStream.range(1, regionReplication)
286      .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i))).forEach(hri -> {
287        env.getAssignmentManager().getRegionStates().deleteRegion(hri);
288        env.getMasterServices().getServerManager().removeRegion(hri);
289        FavoredNodesManager fnm = env.getMasterServices().getFavoredNodesManager();
290        if (fnm != null) {
291          fnm.deleteFavoredNodesForRegions(Collections.singletonList(hri));
292        }
293      });
294  }
295
296  static void checkClosedRegion(MasterProcedureEnv env, RegionInfo regionInfo) throws IOException {
297    if (WALSplitUtil.hasRecoveredEdits(env.getMasterConfiguration(), regionInfo)) {
298      throw new IOException("Recovered.edits are found in Region: " + regionInfo
299        + ", abort split/merge to prevent data loss");
300    }
301  }
302
303  /**
304   * For splitting, need to test both region info and state, and will return true if either of the
305   * test returns true. Please see the comments in
306   * {@link AssignmentManager#markRegionAsSplit(RegionInfo, ServerName, RegionInfo, RegionInfo)} for
307   * more details on why we need to test two conditions.
308   */
309  static boolean isSplitOrMerged(RegionStateNode regionStateNode) {
310    return regionStateNode.getState() == RegionState.State.SPLIT
311      || regionStateNode.getRegionInfo().isSplit()
312      || regionStateNode.getState() == RegionState.State.MERGED;
313  }
314}