001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Map; 027import java.util.Optional; 028import java.util.Set; 029import java.util.SortedSet; 030import java.util.TreeSet; 031import java.util.stream.Collectors; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.MetaTableAccessor; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.client.RegionInfoBuilder; 037import org.apache.hadoop.hbase.client.RegionReplicaUtil; 038import org.apache.hadoop.hbase.client.TableDescriptor; 039import org.apache.hadoop.hbase.exceptions.MergeRegionException; 040import org.apache.hadoop.hbase.master.MasterServices; 041import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 042import org.apache.hadoop.hbase.util.Bytes; 043import org.apache.hadoop.hbase.util.Pair; 044import org.apache.yetus.audience.InterfaceAudience; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap; 049import org.apache.hbase.thirdparty.com.google.common.collect.ListMultimap; 050 051/** 052 * Server-side fixing of bad or inconsistent state in hbase:meta. Distinct from MetaTableAccessor 053 * because {@link MetaTableAccessor} is about low-level manipulations driven by the Master. This 054 * class MetaFixer is employed by the Master and it 'knows' about holes and orphans and encapsulates 055 * their fixing on behalf of the Master. 056 */ 057@InterfaceAudience.Private 058public class MetaFixer { 059 private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class); 060 private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count"; 061 private static final int MAX_MERGE_COUNT_DEFAULT = 64; 062 063 private final MasterServices masterServices; 064 /** 065 * Maximum for many regions to merge at a time. 066 */ 067 private final int maxMergeCount; 068 069 public MetaFixer(MasterServices masterServices) { 070 this.masterServices = masterServices; 071 this.maxMergeCount = 072 this.masterServices.getConfiguration().getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT); 073 } 074 075 public void fix() throws IOException { 076 CatalogJanitorReport report = this.masterServices.getCatalogJanitor().getLastReport(); 077 if (report == null) { 078 LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " 079 + "shell or wait until CatalogJanitor chore runs."); 080 return; 081 } 082 fixHoles(report); 083 fixOverlaps(report); 084 // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which 085 // can help cleaning up damaged hbase:meta. 086 this.masterServices.runReplicationBarrierCleaner(); 087 } 088 089 /** 090 * If hole, it papers it over by adding a region in the filesystem and to hbase:meta. Does not 091 * assign. 092 */ 093 void fixHoles(CatalogJanitorReport report) { 094 final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles(); 095 if (holes.isEmpty()) { 096 LOG.info("CatalogJanitor Report contains no holes to fix. Skipping."); 097 return; 098 } 099 100 LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.", 101 holes.size()); 102 103 final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes); 104 final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos); 105 final TransitRegionStateProcedure[] assignProcedures = 106 masterServices.getAssignmentManager().createRoundRobinAssignProcedures(newMetaEntries); 107 108 masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures); 109 LOG.info("Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size()); 110 } 111 112 /** 113 * Create a new {@link RegionInfo} corresponding to each provided "hole" pair. 114 */ 115 private static List<RegionInfo> 116 createRegionInfosForHoles(final List<Pair<RegionInfo, RegionInfo>> holes) { 117 final List<RegionInfo> newRegionInfos = holes.stream().map(MetaFixer::getHoleCover) 118 .filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList()); 119 LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.", 120 newRegionInfos.size(), holes.size()); 121 return newRegionInfos; 122 } 123 124 /** 125 * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described 126 * in {@code hole}. 127 */ 128 private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) { 129 final RegionInfo left = hole.getFirst(); 130 final RegionInfo right = hole.getSecond(); 131 132 if (left.getTable().equals(right.getTable())) { 133 // Simple case. 134 if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) { 135 LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;" 136 + " left=<{}>, right=<{}>", left, right); 137 return Optional.empty(); 138 } 139 return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey())); 140 } 141 142 final boolean leftUndefined = left.equals(RegionInfoBuilder.UNDEFINED); 143 final boolean rightUndefined = right.equals(RegionInfoBuilder.UNDEFINED); 144 final boolean last = left.isLast(); 145 final boolean first = right.isFirst(); 146 if (leftUndefined && rightUndefined) { 147 LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " 148 + "UNDEFINED; left=<{}>, right=<{}>", left, right); 149 return Optional.empty(); 150 } 151 if (leftUndefined || last) { 152 return Optional 153 .of(buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey())); 154 } 155 if (rightUndefined || first) { 156 return Optional 157 .of(buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW)); 158 } 159 LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right); 160 return Optional.empty(); 161 } 162 163 private static RegionInfo buildRegionInfo(TableName tn, byte[] start, byte[] end) { 164 return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build(); 165 } 166 167 /** 168 * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort. 169 * @param masterServices used to connect to {@code hbase:meta} 170 * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem 171 * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were 172 * successfully created 173 */ 174 private static List<RegionInfo> createMetaEntries(final MasterServices masterServices, 175 final List<RegionInfo> newRegionInfos) { 176 177 final List<Either<List<RegionInfo>, IOException>> addMetaEntriesResults = 178 newRegionInfos.stream().map(regionInfo -> { 179 try { 180 TableDescriptor td = masterServices.getTableDescriptors().get(regionInfo.getTable()); 181 182 // Add replicas if needed 183 // we need to create regions with replicaIds starting from 1 184 List<RegionInfo> newRegions = RegionReplicaUtil 185 .addReplicas(Collections.singletonList(regionInfo), 1, td.getRegionReplication()); 186 187 // Add regions to META 188 MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), newRegions, 189 td.getRegionReplication()); 190 191 return Either.<List<RegionInfo>, IOException> ofLeft(newRegions); 192 } catch (IOException e) { 193 return Either.<List<RegionInfo>, IOException> ofRight(e); 194 } 195 }).collect(Collectors.toList()); 196 final List<RegionInfo> createMetaEntriesSuccesses = 197 addMetaEntriesResults.stream().filter(Either::hasLeft).map(Either::getLeft) 198 .flatMap(List::stream).collect(Collectors.toList()); 199 final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream() 200 .filter(Either::hasRight).map(Either::getRight).collect(Collectors.toList()); 201 LOG.debug("Added {}/{} entries to hbase:meta", createMetaEntriesSuccesses.size(), 202 newRegionInfos.size()); 203 204 if (!createMetaEntriesFailures.isEmpty()) { 205 LOG.warn( 206 "Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First" 207 + " failure message included; full list of failures with accompanying stack traces is" 208 + " available at log level DEBUG. message={}", 209 createMetaEntriesFailures.size(), addMetaEntriesResults.size(), 210 createMetaEntriesFailures.get(0).getMessage()); 211 if (LOG.isDebugEnabled()) { 212 createMetaEntriesFailures 213 .forEach(ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe)); 214 } 215 } 216 217 return createMetaEntriesSuccesses; 218 } 219 220 /** 221 * Fix overlaps noted in CJ consistency report. 222 */ 223 List<Long> fixOverlaps(CatalogJanitorReport report) throws IOException { 224 List<Long> pidList = new ArrayList<>(); 225 for (Set<RegionInfo> regions : calculateMerges(maxMergeCount, report.getOverlaps())) { 226 RegionInfo[] regionsArray = regions.toArray(new RegionInfo[] {}); 227 try { 228 pidList.add(this.masterServices.mergeRegions(regionsArray, true, HConstants.NO_NONCE, 229 HConstants.NO_NONCE)); 230 } catch (MergeRegionException mre) { 231 LOG.warn("Failed overlap fix of {}", regionsArray, mre); 232 } 233 } 234 return pidList; 235 } 236 237 /** 238 * Run through <code>overlaps</code> and return a list of merges to run. Presumes overlaps are 239 * ordered (which they are coming out of the CatalogJanitor consistency report). 240 * @param maxMergeCount Maximum regions to merge at a time (avoid merging 100k regions in one go!) 241 */ 242 static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount, 243 List<Pair<RegionInfo, RegionInfo>> overlaps) { 244 if (overlaps.isEmpty()) { 245 LOG.debug("No overlaps."); 246 return Collections.emptyList(); 247 } 248 List<SortedSet<RegionInfo>> merges = new ArrayList<>(); 249 // First group overlaps by table then calculate merge table by table. 250 ListMultimap<TableName, Pair<RegionInfo, RegionInfo>> overlapGroups = 251 ArrayListMultimap.create(); 252 for (Pair<RegionInfo, RegionInfo> pair : overlaps) { 253 overlapGroups.put(pair.getFirst().getTable(), pair); 254 } 255 for (Map.Entry<TableName, Collection<Pair<RegionInfo, RegionInfo>>> entry : overlapGroups 256 .asMap().entrySet()) { 257 calculateTableMerges(maxMergeCount, merges, entry.getValue()); 258 } 259 return merges; 260 } 261 262 private static void calculateTableMerges(int maxMergeCount, List<SortedSet<RegionInfo>> merges, 263 Collection<Pair<RegionInfo, RegionInfo>> overlaps) { 264 SortedSet<RegionInfo> currentMergeSet = new TreeSet<>(); 265 HashSet<RegionInfo> regionsInMergeSet = new HashSet<>(); 266 RegionInfo regionInfoWithlargestEndKey = null; 267 for (Pair<RegionInfo, RegionInfo> pair : overlaps) { 268 if (regionInfoWithlargestEndKey != null) { 269 if ( 270 !isOverlap(regionInfoWithlargestEndKey, pair) || currentMergeSet.size() >= maxMergeCount 271 ) { 272 // Log when we cut-off-merge because we hit the configured maximum merge limit. 273 if (currentMergeSet.size() >= maxMergeCount) { 274 LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount); 275 } 276 277 // In the case of the merge set contains only 1 region or empty, it does not need to 278 // submit this merge request as no merge is going to happen. currentMergeSet can be 279 // reused in this case. 280 if (currentMergeSet.size() <= 1) { 281 for (RegionInfo ri : currentMergeSet) { 282 regionsInMergeSet.remove(ri); 283 } 284 currentMergeSet.clear(); 285 } else { 286 merges.add(currentMergeSet); 287 currentMergeSet = new TreeSet<>(); 288 } 289 } 290 } 291 292 // Do not add the same region into multiple merge set, this will fail 293 // the second merge request. 294 if (!regionsInMergeSet.contains(pair.getFirst())) { 295 currentMergeSet.add(pair.getFirst()); 296 regionsInMergeSet.add(pair.getFirst()); 297 } 298 if (!regionsInMergeSet.contains(pair.getSecond())) { 299 currentMergeSet.add(pair.getSecond()); 300 regionsInMergeSet.add(pair.getSecond()); 301 } 302 303 regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey( 304 getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()), 305 regionInfoWithlargestEndKey); 306 } 307 merges.add(currentMergeSet); 308 } 309 310 /** 311 * @return Either <code>a</code> or <code>b</code>, whichever has the endkey that is furthest 312 * along in the Table. 313 */ 314 static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) { 315 if (a == null) { 316 // b may be null. 317 return b; 318 } 319 if (b == null) { 320 // Both are null. The return is not-defined. 321 return a; 322 } 323 if (!a.getTable().equals(b.getTable())) { 324 // This is an odd one. This should be the right answer. 325 return b; 326 } 327 if (a.isLast()) { 328 return a; 329 } 330 if (b.isLast()) { 331 return b; 332 } 333 int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey()); 334 return compare == 0 || compare > 0 ? a : b; 335 } 336 337 /** 338 * @return True if an overlap found between passed in <code>ri</code> and the <code>pair</code>. 339 * Does NOT check the pairs themselves overlap. 340 */ 341 static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) { 342 if (ri == null || pair == null) { 343 // Can't be an overlap in either of these cases. 344 return false; 345 } 346 return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond()); 347 } 348 349 /** 350 * A union over {@link L} and {@link R}. 351 */ 352 private static class Either<L, R> { 353 private final L left; 354 private final R right; 355 356 public static <L, R> Either<L, R> ofLeft(L left) { 357 return new Either<>(left, null); 358 } 359 360 public static <L, R> Either<L, R> ofRight(R right) { 361 return new Either<>(null, right); 362 } 363 364 Either(L left, R right) { 365 this.left = left; 366 this.right = right; 367 } 368 369 public boolean hasLeft() { 370 return left != null; 371 } 372 373 public L getLeft() { 374 if (!hasLeft()) { 375 throw new IllegalStateException("Either contains no left."); 376 } 377 return left; 378 } 379 380 public boolean hasRight() { 381 return right != null; 382 } 383 384 public R getRight() { 385 if (!hasRight()) { 386 throw new IllegalStateException("Either contains no right."); 387 } 388 return right; 389 } 390 } 391}