001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Map; 027import java.util.Optional; 028import java.util.Set; 029import java.util.SortedSet; 030import java.util.TreeSet; 031import java.util.stream.Collectors; 032import org.apache.hadoop.hbase.HBaseIOException; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.MetaTableAccessor; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.RegionInfoBuilder; 038import org.apache.hadoop.hbase.client.RegionReplicaUtil; 039import org.apache.hadoop.hbase.client.TableDescriptor; 040import org.apache.hadoop.hbase.exceptions.MergeRegionException; 041import org.apache.hadoop.hbase.master.MasterServices; 042import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 043import org.apache.hadoop.hbase.replication.ReplicationException; 044import org.apache.hadoop.hbase.util.Bytes; 045import org.apache.hadoop.hbase.util.Pair; 046import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 047import org.apache.yetus.audience.InterfaceAudience; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap; 052import org.apache.hbase.thirdparty.com.google.common.collect.ListMultimap; 053 054/** 055 * Server-side fixing of bad or inconsistent state in hbase:meta. 056 * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level 057 * manipulations driven by the Master. This class MetaFixer is 058 * employed by the Master and it 'knows' about holes and orphans 059 * and encapsulates their fixing on behalf of the Master. 060 */ 061@InterfaceAudience.Private 062public class MetaFixer { 063 private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class); 064 private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count"; 065 private static final int MAX_MERGE_COUNT_DEFAULT = 64; 066 067 private final MasterServices masterServices; 068 /** 069 * Maximum for many regions to merge at a time. 070 */ 071 private final int maxMergeCount; 072 073 public MetaFixer(MasterServices masterServices) { 074 this.masterServices = masterServices; 075 this.maxMergeCount = this.masterServices.getConfiguration(). 076 getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT); 077 } 078 079 public void fix() throws IOException { 080 Report report = this.masterServices.getCatalogJanitor().getLastReport(); 081 if (report == null) { 082 LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " + 083 "shell or wait until CatalogJanitor chore runs."); 084 return; 085 } 086 fixHoles(report); 087 fixOverlaps(report); 088 // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which 089 // can help cleaning up damaged hbase:meta. 090 this.masterServices.runReplicationBarrierCleaner(); 091 } 092 093 /** 094 * If hole, it papers it over by adding a region in the filesystem and to hbase:meta. 095 * Does not assign. 096 */ 097 void fixHoles(Report report) { 098 final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles(); 099 if (holes.isEmpty()) { 100 LOG.info("CatalogJanitor Report contains no holes to fix. Skipping."); 101 return; 102 } 103 104 LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.", 105 holes.size()); 106 107 final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes); 108 final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos); 109 final TransitRegionStateProcedure[] assignProcedures = masterServices 110 .getAssignmentManager() 111 .createRoundRobinAssignProcedures(newMetaEntries); 112 113 masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures); 114 LOG.info( 115 "Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size()); 116 } 117 118 /** 119 * Create a new {@link RegionInfo} corresponding to each provided "hole" pair. 120 */ 121 private static List<RegionInfo> createRegionInfosForHoles( 122 final List<Pair<RegionInfo, RegionInfo>> holes) { 123 final List<RegionInfo> newRegionInfos = holes.stream() 124 .map(MetaFixer::getHoleCover) 125 .filter(Optional::isPresent) 126 .map(Optional::get) 127 .collect(Collectors.toList()); 128 LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.", 129 newRegionInfos.size(), holes.size()); 130 return newRegionInfos; 131 } 132 133 /** 134 * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described 135 * in {@code hole}. 136 */ 137 private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) { 138 final RegionInfo left = hole.getFirst(); 139 final RegionInfo right = hole.getSecond(); 140 141 if (left.getTable().equals(right.getTable())) { 142 // Simple case. 143 if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) { 144 LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;" 145 + " left=<{}>, right=<{}>", left, right); 146 return Optional.empty(); 147 } 148 return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey())); 149 } 150 151 final boolean leftUndefined = left.equals(RegionInfoBuilder.UNDEFINED); 152 final boolean rightUndefined = right.equals(RegionInfoBuilder.UNDEFINED); 153 final boolean last = left.isLast(); 154 final boolean first = right.isFirst(); 155 if (leftUndefined && rightUndefined) { 156 LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " + 157 "UNDEFINED; left=<{}>, right=<{}>", left, right); 158 return Optional.empty(); 159 } 160 if (leftUndefined || last) { 161 return Optional.of( 162 buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey())); 163 } 164 if (rightUndefined || first) { 165 return Optional.of( 166 buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW)); 167 } 168 LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right); 169 return Optional.empty(); 170 } 171 172 private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) { 173 return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build(); 174 } 175 176 /** 177 * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort. 178 * @param masterServices used to connect to {@code hbase:meta} 179 * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem 180 * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were 181 * successfully created 182 */ 183 private static List<RegionInfo> createMetaEntries(final MasterServices masterServices, 184 final List<RegionInfo> newRegionInfos) { 185 186 final List<Either<List<RegionInfo>, IOException>> addMetaEntriesResults = newRegionInfos. 187 stream().map(regionInfo -> { 188 try { 189 TableDescriptor td = masterServices.getTableDescriptors().get(regionInfo.getTable()); 190 191 // Add replicas if needed 192 // we need to create regions with replicaIds starting from 1 193 List<RegionInfo> newRegions = RegionReplicaUtil 194 .addReplicas(Collections.singletonList(regionInfo), 1, td.getRegionReplication()); 195 196 // Add regions to META 197 MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), newRegions, 198 td.getRegionReplication()); 199 200 // Setup replication for region replicas if needed 201 if (td.getRegionReplication() > 1) { 202 ServerRegionReplicaUtil.setupRegionReplicaReplication(masterServices); 203 } 204 return Either.<List<RegionInfo>, IOException> ofLeft(newRegions); 205 } catch (IOException e) { 206 return Either.<List<RegionInfo>, IOException> ofRight(e); 207 } catch (ReplicationException e) { 208 return Either.<List<RegionInfo>, IOException> ofRight(new HBaseIOException(e)); 209 } 210 }) 211 .collect(Collectors.toList()); 212 final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream() 213 .filter(Either::hasLeft) 214 .map(Either::getLeft) 215 .flatMap(List::stream) 216 .collect(Collectors.toList()); 217 final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream() 218 .filter(Either::hasRight) 219 .map(Either::getRight) 220 .collect(Collectors.toList()); 221 LOG.debug("Added {}/{} entries to hbase:meta", 222 createMetaEntriesSuccesses.size(), newRegionInfos.size()); 223 224 if (!createMetaEntriesFailures.isEmpty()) { 225 LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First" 226 + " failure message included; full list of failures with accompanying stack traces is" 227 + " available at log level DEBUG. message={}", createMetaEntriesFailures.size(), 228 addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage()); 229 if (LOG.isDebugEnabled()) { 230 createMetaEntriesFailures.forEach( 231 ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe)); 232 } 233 } 234 235 return createMetaEntriesSuccesses; 236 } 237 238 /** 239 * Fix overlaps noted in CJ consistency report. 240 */ 241 List<Long> fixOverlaps(Report report) throws IOException { 242 List<Long> pidList = new ArrayList<>(); 243 for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) { 244 RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {}); 245 try { 246 pidList.add(this.masterServices 247 .mergeRegions(regionsArray, true, HConstants.NO_NONCE, HConstants.NO_NONCE)); 248 } catch (MergeRegionException mre) { 249 LOG.warn("Failed overlap fix of {}", regionsArray, mre); 250 } 251 } 252 return pidList; 253 } 254 255 /** 256 * Run through <code>overlaps</code> and return a list of merges to run. 257 * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor 258 * consistency report). 259 * @param maxMergeCount Maximum regions to merge at a time (avoid merging 260 * 100k regions in one go!) 261 */ 262 static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount, 263 List<Pair<RegionInfo, RegionInfo>> overlaps) { 264 if (overlaps.isEmpty()) { 265 LOG.debug("No overlaps."); 266 return Collections.emptyList(); 267 } 268 List<SortedSet<RegionInfo>> merges = new ArrayList<>(); 269 // First group overlaps by table then calculate merge table by table. 270 ListMultimap<TableName, Pair<RegionInfo, RegionInfo>> overlapGroups = 271 ArrayListMultimap.create(); 272 for (Pair<RegionInfo, RegionInfo> pair : overlaps) { 273 overlapGroups.put(pair.getFirst().getTable(), pair); 274 } 275 for (Map.Entry<TableName, Collection<Pair<RegionInfo, RegionInfo>>> entry : overlapGroups 276 .asMap().entrySet()) { 277 calculateTableMerges(maxMergeCount, merges, entry.getValue()); 278 } 279 return merges; 280 } 281 282 private static void calculateTableMerges(int maxMergeCount, List<SortedSet<RegionInfo>> merges, 283 Collection<Pair<RegionInfo, RegionInfo>> overlaps) { 284 SortedSet<RegionInfo> currentMergeSet = new TreeSet<>(); 285 HashSet<RegionInfo> regionsInMergeSet = new HashSet<>(); 286 RegionInfo regionInfoWithlargestEndKey = null; 287 for (Pair<RegionInfo, RegionInfo> pair: overlaps) { 288 if (regionInfoWithlargestEndKey != null) { 289 if (!isOverlap(regionInfoWithlargestEndKey, pair) || 290 currentMergeSet.size() >= maxMergeCount) { 291 // Log when we cut-off-merge because we hit the configured maximum merge limit. 292 if (currentMergeSet.size() >= maxMergeCount) { 293 LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount); 294 } 295 296 // In the case of the merge set contains only 1 region or empty, it does not need to 297 // submit this merge request as no merge is going to happen. currentMergeSet can be 298 // reused in this case. 299 if (currentMergeSet.size() <= 1) { 300 for (RegionInfo ri : currentMergeSet) { 301 regionsInMergeSet.remove(ri); 302 } 303 currentMergeSet.clear(); 304 } else { 305 merges.add(currentMergeSet); 306 currentMergeSet = new TreeSet<>(); 307 } 308 } 309 } 310 311 // Do not add the same region into multiple merge set, this will fail 312 // the second merge request. 313 if (!regionsInMergeSet.contains(pair.getFirst())) { 314 currentMergeSet.add(pair.getFirst()); 315 regionsInMergeSet.add(pair.getFirst()); 316 } 317 if (!regionsInMergeSet.contains(pair.getSecond())) { 318 currentMergeSet.add(pair.getSecond()); 319 regionsInMergeSet.add(pair.getSecond()); 320 } 321 322 regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey( 323 getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()), 324 regionInfoWithlargestEndKey); 325 } 326 merges.add(currentMergeSet); 327 } 328 329 /** 330 * @return Either <code>a</code> or <code>b</code>, whichever has the 331 * endkey that is furthest along in the Table. 332 */ 333 static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) { 334 if (a == null) { 335 // b may be null. 336 return b; 337 } 338 if (b == null) { 339 // Both are null. The return is not-defined. 340 return a; 341 } 342 if (!a.getTable().equals(b.getTable())) { 343 // This is an odd one. This should be the right answer. 344 return b; 345 } 346 if (a.isLast()) { 347 return a; 348 } 349 if (b.isLast()) { 350 return b; 351 } 352 int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey()); 353 return compare == 0 || compare > 0? a: b; 354 } 355 356 /** 357 * @return True if an overlap found between passed in <code>ri</code> and 358 * the <code>pair</code>. Does NOT check the pairs themselves overlap. 359 */ 360 static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) { 361 if (ri == null || pair == null) { 362 // Can't be an overlap in either of these cases. 363 return false; 364 } 365 return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond()); 366 } 367 368 /** 369 * A union over {@link L} and {@link R}. 370 */ 371 private static class Either<L, R> { 372 private final L left; 373 private final R right; 374 375 public static <L, R> Either<L, R> ofLeft(L left) { 376 return new Either<>(left, null); 377 } 378 379 public static <L, R> Either<L, R> ofRight(R right) { 380 return new Either<>(null, right); 381 } 382 383 Either(L left, R right) { 384 this.left = left; 385 this.right = right; 386 } 387 388 public boolean hasLeft() { 389 return left != null; 390 } 391 392 public L getLeft() { 393 if (!hasLeft()) { 394 throw new IllegalStateException("Either contains no left."); 395 } 396 return left; 397 } 398 399 public boolean hasRight() { 400 return right != null; 401 } 402 403 public R getRight() { 404 if (!hasRight()) { 405 throw new IllegalStateException("Either contains no right."); 406 } 407 return right; 408 } 409 } 410}