001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_ENABLE_SEPARATE_CHILD_REGIONS; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.Collections; 025import java.util.List; 026import java.util.ListIterator; 027import java.util.stream.Collectors; 028import java.util.stream.IntStream; 029import java.util.stream.Stream; 030import org.apache.commons.lang3.ArrayUtils; 031import org.apache.hadoop.hbase.HBaseIOException; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.ServerName; 034import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.client.RegionReplicaUtil; 037import org.apache.hadoop.hbase.favored.FavoredNodesManager; 038import org.apache.hadoop.hbase.master.RegionState; 039import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 040import org.apache.hadoop.hbase.util.FutureUtils; 041import org.apache.hadoop.hbase.wal.WALSplitUtil; 042import org.apache.yetus.audience.InterfaceAudience; 043 044import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; 045import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest; 046import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 047 048/** 049 * Utility for this assignment package only. 050 */ 051@InterfaceAudience.Private 052final class AssignmentManagerUtil { 053 private static final int DEFAULT_REGION_REPLICA = 1; 054 055 private AssignmentManagerUtil() { 056 } 057 058 /** 059 * Raw call to remote regionserver to get info on a particular region. 060 * @throws IOException Let it out so can report this IOE as reason for failure 061 */ 062 static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env, 063 final ServerName regionLocation, final RegionInfo hri) throws IOException { 064 return getRegionInfoResponse(env, regionLocation, hri, false); 065 } 066 067 static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env, 068 final ServerName regionLocation, final RegionInfo hri, boolean includeBestSplitRow) 069 throws IOException { 070 AsyncRegionServerAdmin admin = 071 env.getMasterServices().getAsyncClusterConnection().getRegionServerAdmin(regionLocation); 072 GetRegionInfoRequest request = null; 073 if (includeBestSplitRow) { 074 request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName(), false, true); 075 } else { 076 request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName()); 077 } 078 return FutureUtils.get(admin.getRegionInfo(request)); 079 } 080 081 private static void lock(List<RegionStateNode> regionNodes) { 082 regionNodes.iterator().forEachRemaining(RegionStateNode::lock); 083 } 084 085 private static void unlock(List<RegionStateNode> regionNodes) { 086 for (ListIterator<RegionStateNode> iter = regionNodes.listIterator(regionNodes.size()); iter 087 .hasPrevious();) { 088 iter.previous().unlock(); 089 } 090 } 091 092 static TransitRegionStateProcedure[] createUnassignProceduresForSplitOrMerge( 093 MasterProcedureEnv env, Stream<RegionInfo> regions, int regionReplication) throws IOException { 094 List<RegionStateNode> regionNodes = regions 095 .flatMap(hri -> IntStream.range(0, regionReplication) 096 .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i))) 097 .map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode) 098 .collect(Collectors.toList()); 099 TransitRegionStateProcedure[] procs = new TransitRegionStateProcedure[regionNodes.size()]; 100 boolean rollback = true; 101 int i = 0; 102 // hold the lock at once, and then release it in finally. This is important as SCP may jump in 103 // if we release the lock in the middle when we want to do rollback, and cause problems. 104 lock(regionNodes); 105 try { 106 for (; i < procs.length; i++) { 107 RegionStateNode regionNode = regionNodes.get(i); 108 TransitRegionStateProcedure proc = 109 TransitRegionStateProcedure.unassignSplitMerge(env, regionNode.getRegionInfo()); 110 if (regionNode.getProcedure() != null) { 111 throw new HBaseIOException( 112 "The parent region " + regionNode + " is currently in transition, give up"); 113 } 114 regionNode.setProcedure(proc); 115 procs[i] = proc; 116 } 117 // all succeeded, set rollback to false 118 rollback = false; 119 } finally { 120 if (rollback) { 121 for (;;) { 122 i--; 123 if (i < 0) { 124 break; 125 } 126 RegionStateNode regionNode = regionNodes.get(i); 127 regionNode.unsetProcedure(procs[i]); 128 } 129 } 130 unlock(regionNodes); 131 } 132 return procs; 133 } 134 135 /** 136 * Create assign procedures for the give regions, according to the {@code regionReplication}. 137 * <p/> 138 * For rolling back, we will submit procedures directly to the {@code ProcedureExecutor}, so it is 139 * possible that we persist the newly scheduled procedures, and then crash before persisting the 140 * rollback state, so when we arrive here the second time, it is possible that some regions have 141 * already been associated with a TRSP. 142 * @param ignoreIfInTransition if true, will skip creating TRSP for the given region if it is 143 * already in transition, otherwise we will add an assert that it 144 * should not in transition. 145 */ 146 private static TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env, 147 List<RegionInfo> regions, int regionReplication, ServerName targetServer, 148 boolean ignoreIfInTransition) { 149 // create the assign procs only for the primary region using the targetServer 150 TransitRegionStateProcedure[] primaryRegionProcs = 151 regions.stream().map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode) 152 .map(regionNode -> { 153 TransitRegionStateProcedure proc = 154 TransitRegionStateProcedure.assign(env, regionNode.getRegionInfo(), targetServer); 155 regionNode.lock(); 156 try { 157 if (ignoreIfInTransition) { 158 if (regionNode.isTransitionScheduled()) { 159 return null; 160 } 161 } else { 162 // should never fail, as we have the exclusive region lock, and the region is newly 163 // created, or has been successfully closed so should not be on any servers, so SCP 164 // will 165 // not process it either. 166 assert !regionNode.isTransitionScheduled(); 167 } 168 regionNode.setProcedure(proc); 169 } finally { 170 regionNode.unlock(); 171 } 172 return proc; 173 }).filter(p -> p != null).toArray(TransitRegionStateProcedure[]::new); 174 if (regionReplication == DEFAULT_REGION_REPLICA) { 175 // this is the default case 176 return primaryRegionProcs; 177 } 178 // collect the replica region infos 179 List<RegionInfo> replicaRegionInfos = 180 new ArrayList<RegionInfo>(regions.size() * (regionReplication - 1)); 181 for (RegionInfo hri : regions) { 182 // start the index from 1 183 for (int i = 1; i < regionReplication; i++) { 184 RegionInfo ri = RegionReplicaUtil.getRegionInfoForReplica(hri, i); 185 // apply ignoreRITs to replica regions as well. 186 if ( 187 !ignoreIfInTransition || !env.getAssignmentManager().getRegionStates() 188 .getOrCreateRegionStateNode(ri).isTransitionScheduled() 189 ) { 190 replicaRegionInfos.add(ri); 191 } 192 } 193 } 194 195 // create round robin procs. Note that we exclude the primary region's target server 196 TransitRegionStateProcedure[] replicaRegionAssignProcs = 197 env.getAssignmentManager().createRoundRobinAssignProcedures(replicaRegionInfos, 198 Collections.singletonList(targetServer)); 199 // combine both the procs and return the result 200 return ArrayUtils.addAll(primaryRegionProcs, replicaRegionAssignProcs); 201 } 202 203 /** 204 * Create round robin assign procedures for the given regions, according to the 205 * {@code regionReplication}. 206 * <p/> 207 * For rolling back, we will submit procedures directly to the {@code ProcedureExecutor}, so it is 208 * possible that we persist the newly scheduled procedures, and then crash before persisting the 209 * rollback state, so when we arrive here the second time, it is possible that some regions have 210 * already been associated with a TRSP. 211 * @param ignoreIfInTransition if true, will skip creating TRSP for the given region if it is 212 * already in transition, otherwise we will add an assert that it 213 * should not in transition. 214 */ 215 private static TransitRegionStateProcedure[] createRoundRobinAssignProcedures( 216 MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication, 217 List<ServerName> serversToExclude, boolean ignoreIfInTransition) { 218 List<RegionInfo> regionsAndReplicas = new ArrayList<>(regions); 219 if (regionReplication != DEFAULT_REGION_REPLICA) { 220 221 // collect the replica region infos 222 List<RegionInfo> replicaRegionInfos = 223 new ArrayList<RegionInfo>(regions.size() * (regionReplication - 1)); 224 for (RegionInfo hri : regions) { 225 // start the index from 1 226 for (int i = 1; i < regionReplication; i++) { 227 replicaRegionInfos.add(RegionReplicaUtil.getRegionInfoForReplica(hri, i)); 228 } 229 } 230 regionsAndReplicas.addAll(replicaRegionInfos); 231 } 232 if (ignoreIfInTransition) { 233 for (RegionInfo region : regionsAndReplicas) { 234 if ( 235 env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region) 236 .isTransitionScheduled() 237 ) { 238 return null; 239 } 240 } 241 } 242 // create round robin procs. Note that we exclude the primary region's target server 243 return env.getAssignmentManager().createRoundRobinAssignProcedures(regionsAndReplicas, 244 serversToExclude); 245 } 246 247 static TransitRegionStateProcedure[] createAssignProceduresForSplitDaughters( 248 MasterProcedureEnv env, List<RegionInfo> daughters, int regionReplication, 249 ServerName parentServer) { 250 if ( 251 env.getMasterConfiguration().getBoolean(HConstants.HBASE_ENABLE_SEPARATE_CHILD_REGIONS, 252 DEFAULT_HBASE_ENABLE_SEPARATE_CHILD_REGIONS) 253 ) { 254 // keep one daughter on the parent region server 255 TransitRegionStateProcedure[] daughterOne = createAssignProcedures(env, 256 Collections.singletonList(daughters.get(0)), regionReplication, parentServer, false); 257 // round robin assign the other daughter 258 TransitRegionStateProcedure[] daughterTwo = 259 createRoundRobinAssignProcedures(env, Collections.singletonList(daughters.get(1)), 260 regionReplication, Collections.singletonList(parentServer), false); 261 return ArrayUtils.addAll(daughterOne, daughterTwo); 262 } 263 return createAssignProceduresForOpeningNewRegions(env, daughters, regionReplication, 264 parentServer); 265 } 266 267 static TransitRegionStateProcedure[] createAssignProceduresForOpeningNewRegions( 268 MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication, 269 ServerName targetServer) { 270 return createAssignProcedures(env, regions, regionReplication, targetServer, false); 271 } 272 273 static void reopenRegionsForRollback(MasterProcedureEnv env, List<RegionInfo> regions, 274 int regionReplication, ServerName targetServer) { 275 TransitRegionStateProcedure[] procs = 276 createAssignProcedures(env, regions, regionReplication, targetServer, true); 277 if (procs.length > 0) { 278 env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); 279 } 280 } 281 282 static void removeNonDefaultReplicas(MasterProcedureEnv env, Stream<RegionInfo> regions, 283 int regionReplication) { 284 // Remove from in-memory states 285 regions.flatMap(hri -> IntStream.range(1, regionReplication) 286 .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i))).forEach(hri -> { 287 env.getAssignmentManager().getRegionStates().deleteRegion(hri); 288 env.getMasterServices().getServerManager().removeRegion(hri); 289 FavoredNodesManager fnm = env.getMasterServices().getFavoredNodesManager(); 290 if (fnm != null) { 291 fnm.deleteFavoredNodesForRegions(Collections.singletonList(hri)); 292 } 293 }); 294 } 295 296 static void checkClosedRegion(MasterProcedureEnv env, RegionInfo regionInfo) throws IOException { 297 if (WALSplitUtil.hasRecoveredEdits(env.getMasterConfiguration(), regionInfo)) { 298 throw new IOException("Recovered.edits are found in Region: " + regionInfo 299 + ", abort split/merge to prevent data loss"); 300 } 301 } 302 303 /** 304 * For splitting, need to test both region info and state, and will return true if either of the 305 * test returns true. Please see the comments in 306 * {@link AssignmentManager#markRegionAsSplit(RegionInfo, ServerName, RegionInfo, RegionInfo)} for 307 * more details on why we need to test two conditions. 308 */ 309 static boolean isSplitOrMerged(RegionStateNode regionStateNode) { 310 return regionStateNode.getState() == RegionState.State.SPLIT 311 || regionStateNode.getRegionInfo().isSplit() 312 || regionStateNode.getState() == RegionState.State.MERGED; 313 } 314}