001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_ENABLE_SEPARATE_CHILD_REGIONS;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.Collections;
025import java.util.List;
026import java.util.ListIterator;
027import java.util.stream.Collectors;
028import java.util.stream.IntStream;
029import java.util.stream.Stream;
030import org.apache.commons.lang3.ArrayUtils;
031import org.apache.hadoop.hbase.HBaseIOException;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.RegionReplicaUtil;
037import org.apache.hadoop.hbase.favored.FavoredNodesManager;
038import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
039import org.apache.hadoop.hbase.util.FutureUtils;
040import org.apache.hadoop.hbase.wal.WALSplitUtil;
041import org.apache.yetus.audience.InterfaceAudience;
042
043import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest;
045import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
046
047/**
048 * Utility for this assignment package only.
049 */
050@InterfaceAudience.Private
051final class AssignmentManagerUtil {
052  private static final int DEFAULT_REGION_REPLICA = 1;
053
054  private AssignmentManagerUtil() {
055  }
056
057  /**
058   * Raw call to remote regionserver to get info on a particular region.
059   * @throws IOException Let it out so can report this IOE as reason for failure
060   */
061  static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env,
062    final ServerName regionLocation, final RegionInfo hri) throws IOException {
063    return getRegionInfoResponse(env, regionLocation, hri, false);
064  }
065
066  static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env,
067    final ServerName regionLocation, final RegionInfo hri, boolean includeBestSplitRow)
068    throws IOException {
069    AsyncRegionServerAdmin admin =
070      env.getMasterServices().getAsyncClusterConnection().getRegionServerAdmin(regionLocation);
071    GetRegionInfoRequest request = null;
072    if (includeBestSplitRow) {
073      request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName(), false, true);
074    } else {
075      request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName());
076    }
077    return FutureUtils.get(admin.getRegionInfo(request));
078  }
079
080  private static void lock(List<RegionStateNode> regionNodes) {
081    regionNodes.iterator().forEachRemaining(RegionStateNode::lock);
082  }
083
084  private static void unlock(List<RegionStateNode> regionNodes) {
085    for (ListIterator<RegionStateNode> iter = regionNodes.listIterator(regionNodes.size()); iter
086      .hasPrevious();) {
087      iter.previous().unlock();
088    }
089  }
090
091  static TransitRegionStateProcedure[] createUnassignProceduresForSplitOrMerge(
092    MasterProcedureEnv env, Stream<RegionInfo> regions, int regionReplication) throws IOException {
093    List<RegionStateNode> regionNodes = regions
094      .flatMap(hri -> IntStream.range(0, regionReplication)
095        .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i)))
096      .map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode)
097      .collect(Collectors.toList());
098    TransitRegionStateProcedure[] procs = new TransitRegionStateProcedure[regionNodes.size()];
099    boolean rollback = true;
100    int i = 0;
101    // hold the lock at once, and then release it in finally. This is important as SCP may jump in
102    // if we release the lock in the middle when we want to do rollback, and cause problems.
103    lock(regionNodes);
104    try {
105      for (; i < procs.length; i++) {
106        RegionStateNode regionNode = regionNodes.get(i);
107        TransitRegionStateProcedure proc =
108          TransitRegionStateProcedure.unassignSplitMerge(env, regionNode.getRegionInfo());
109        if (regionNode.getProcedure() != null) {
110          throw new HBaseIOException(
111            "The parent region " + regionNode + " is currently in transition, give up");
112        }
113        regionNode.setProcedure(proc);
114        procs[i] = proc;
115      }
116      // all succeeded, set rollback to false
117      rollback = false;
118    } finally {
119      if (rollback) {
120        for (;;) {
121          i--;
122          if (i < 0) {
123            break;
124          }
125          RegionStateNode regionNode = regionNodes.get(i);
126          regionNode.unsetProcedure(procs[i]);
127        }
128      }
129      unlock(regionNodes);
130    }
131    return procs;
132  }
133
134  /**
135   * Create assign procedures for the give regions, according to the {@code regionReplication}.
136   * <p/>
137   * For rolling back, we will submit procedures directly to the {@code ProcedureExecutor}, so it is
138   * possible that we persist the newly scheduled procedures, and then crash before persisting the
139   * rollback state, so when we arrive here the second time, it is possible that some regions have
140   * already been associated with a TRSP.
141   * @param ignoreIfInTransition if true, will skip creating TRSP for the given region if it is
142   *                             already in transition, otherwise we will add an assert that it
143   *                             should not in transition.
144   */
145  private static TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env,
146    List<RegionInfo> regions, int regionReplication, ServerName targetServer,
147    boolean ignoreIfInTransition) {
148    // create the assign procs only for the primary region using the targetServer
149    TransitRegionStateProcedure[] primaryRegionProcs =
150      regions.stream().map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode)
151        .map(regionNode -> {
152          TransitRegionStateProcedure proc =
153            TransitRegionStateProcedure.assign(env, regionNode.getRegionInfo(), targetServer);
154          regionNode.lock();
155          try {
156            if (ignoreIfInTransition) {
157              if (regionNode.isInTransition()) {
158                return null;
159              }
160            } else {
161              // should never fail, as we have the exclusive region lock, and the region is newly
162              // created, or has been successfully closed so should not be on any servers, so SCP
163              // will
164              // not process it either.
165              assert !regionNode.isInTransition();
166            }
167            regionNode.setProcedure(proc);
168          } finally {
169            regionNode.unlock();
170          }
171          return proc;
172        }).filter(p -> p != null).toArray(TransitRegionStateProcedure[]::new);
173    if (regionReplication == DEFAULT_REGION_REPLICA) {
174      // this is the default case
175      return primaryRegionProcs;
176    }
177    // collect the replica region infos
178    List<RegionInfo> replicaRegionInfos =
179      new ArrayList<RegionInfo>(regions.size() * (regionReplication - 1));
180    for (RegionInfo hri : regions) {
181      // start the index from 1
182      for (int i = 1; i < regionReplication; i++) {
183        RegionInfo ri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
184        // apply ignoreRITs to replica regions as well.
185        if (
186          !ignoreIfInTransition || !env.getAssignmentManager().getRegionStates()
187            .getOrCreateRegionStateNode(ri).isInTransition()
188        ) {
189          replicaRegionInfos.add(ri);
190        }
191      }
192    }
193
194    // create round robin procs. Note that we exclude the primary region's target server
195    TransitRegionStateProcedure[] replicaRegionAssignProcs =
196      env.getAssignmentManager().createRoundRobinAssignProcedures(replicaRegionInfos,
197        Collections.singletonList(targetServer));
198    // combine both the procs and return the result
199    return ArrayUtils.addAll(primaryRegionProcs, replicaRegionAssignProcs);
200  }
201
202  /**
203   * Create round robin assign procedures for the given regions, according to the
204   * {@code regionReplication}.
205   * <p/>
206   * For rolling back, we will submit procedures directly to the {@code ProcedureExecutor}, so it is
207   * possible that we persist the newly scheduled procedures, and then crash before persisting the
208   * rollback state, so when we arrive here the second time, it is possible that some regions have
209   * already been associated with a TRSP.
210   * @param ignoreIfInTransition if true, will skip creating TRSP for the given region if it is
211   *                             already in transition, otherwise we will add an assert that it
212   *                             should not in transition.
213   */
214  private static TransitRegionStateProcedure[] createRoundRobinAssignProcedures(
215    MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication,
216    List<ServerName> serversToExclude, boolean ignoreIfInTransition) {
217    List<RegionInfo> regionsAndReplicas = new ArrayList<>(regions);
218    if (regionReplication != DEFAULT_REGION_REPLICA) {
219
220      // collect the replica region infos
221      List<RegionInfo> replicaRegionInfos =
222        new ArrayList<RegionInfo>(regions.size() * (regionReplication - 1));
223      for (RegionInfo hri : regions) {
224        // start the index from 1
225        for (int i = 1; i < regionReplication; i++) {
226          replicaRegionInfos.add(RegionReplicaUtil.getRegionInfoForReplica(hri, i));
227        }
228      }
229      regionsAndReplicas.addAll(replicaRegionInfos);
230    }
231    if (ignoreIfInTransition) {
232      for (RegionInfo region : regionsAndReplicas) {
233        if (
234          env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region)
235            .isInTransition()
236        ) {
237          return null;
238        }
239      }
240    }
241    // create round robin procs. Note that we exclude the primary region's target server
242    return env.getAssignmentManager().createRoundRobinAssignProcedures(regionsAndReplicas,
243      serversToExclude);
244  }
245
246  static TransitRegionStateProcedure[] createAssignProceduresForSplitDaughters(
247    MasterProcedureEnv env, List<RegionInfo> daughters, int regionReplication,
248    ServerName parentServer) {
249    if (
250      env.getMasterConfiguration().getBoolean(HConstants.HBASE_ENABLE_SEPARATE_CHILD_REGIONS,
251        DEFAULT_HBASE_ENABLE_SEPARATE_CHILD_REGIONS)
252    ) {
253      // keep one daughter on the parent region server
254      TransitRegionStateProcedure[] daughterOne = createAssignProcedures(env,
255        Collections.singletonList(daughters.get(0)), regionReplication, parentServer, false);
256      // round robin assign the other daughter
257      TransitRegionStateProcedure[] daughterTwo =
258        createRoundRobinAssignProcedures(env, Collections.singletonList(daughters.get(1)),
259          regionReplication, Collections.singletonList(parentServer), false);
260      return ArrayUtils.addAll(daughterOne, daughterTwo);
261    }
262    return createAssignProceduresForOpeningNewRegions(env, daughters, regionReplication,
263      parentServer);
264  }
265
266  static TransitRegionStateProcedure[] createAssignProceduresForOpeningNewRegions(
267    MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication,
268    ServerName targetServer) {
269    return createAssignProcedures(env, regions, regionReplication, targetServer, false);
270  }
271
272  static void reopenRegionsForRollback(MasterProcedureEnv env, List<RegionInfo> regions,
273    int regionReplication, ServerName targetServer) {
274    TransitRegionStateProcedure[] procs =
275      createAssignProcedures(env, regions, regionReplication, targetServer, true);
276    if (procs.length > 0) {
277      env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs);
278    }
279  }
280
281  static void removeNonDefaultReplicas(MasterProcedureEnv env, Stream<RegionInfo> regions,
282    int regionReplication) {
283    // Remove from in-memory states
284    regions.flatMap(hri -> IntStream.range(1, regionReplication)
285      .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i))).forEach(hri -> {
286        env.getAssignmentManager().getRegionStates().deleteRegion(hri);
287        env.getMasterServices().getServerManager().removeRegion(hri);
288        FavoredNodesManager fnm = env.getMasterServices().getFavoredNodesManager();
289        if (fnm != null) {
290          fnm.deleteFavoredNodesForRegions(Collections.singletonList(hri));
291        }
292      });
293  }
294
295  static void checkClosedRegion(MasterProcedureEnv env, RegionInfo regionInfo) throws IOException {
296    if (WALSplitUtil.hasRecoveredEdits(env.getMasterConfiguration(), regionInfo)) {
297      throw new IOException("Recovered.edits are found in Region: " + regionInfo
298        + ", abort split/merge to prevent data loss");
299    }
300  }
301}