001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.HashSet; 024import java.util.List; 025import java.util.Optional; 026import java.util.Set; 027import java.util.SortedSet; 028import java.util.TreeSet; 029import java.util.stream.Collectors; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.MetaTableAccessor; 032import org.apache.hadoop.hbase.TableName; 033import org.apache.hadoop.hbase.client.RegionInfo; 034import org.apache.hadoop.hbase.client.RegionInfoBuilder; 035import org.apache.hadoop.hbase.exceptions.MergeRegionException; 036import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 037import org.apache.hadoop.hbase.util.Bytes; 038import org.apache.hadoop.hbase.util.Pair; 039import org.apache.yetus.audience.InterfaceAudience; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 043 044 045/** 046 * Server-side fixing of bad or inconsistent state in hbase:meta. 047 * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level 048 * manipulations driven by the Master. This class MetaFixer is 049 * employed by the Master and it 'knows' about holes and orphans 050 * and encapsulates their fixing on behalf of the Master. 051 */ 052@InterfaceAudience.Private 053class MetaFixer { 054 private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class); 055 private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count"; 056 private static final int MAX_MERGE_COUNT_DEFAULT = 64; 057 058 private final MasterServices masterServices; 059 /** 060 * Maximum for many regions to merge at a time. 061 */ 062 private final int maxMergeCount; 063 064 MetaFixer(MasterServices masterServices) { 065 this.masterServices = masterServices; 066 this.maxMergeCount = this.masterServices.getConfiguration(). 067 getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT); 068 } 069 070 void fix() throws IOException { 071 CatalogJanitor.Report report = this.masterServices.getCatalogJanitor().getLastReport(); 072 if (report == null) { 073 LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " + 074 "shell or wait until CatalogJanitor chore runs."); 075 return; 076 } 077 fixHoles(report); 078 fixOverlaps(report); 079 // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which 080 // can help cleaning up damaged hbase:meta. 081 this.masterServices.runReplicationBarrierCleaner(); 082 } 083 084 /** 085 * If hole, it papers it over by adding a region in the filesystem and to hbase:meta. 086 * Does not assign. 087 */ 088 void fixHoles(CatalogJanitor.Report report) { 089 final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles(); 090 if (holes.isEmpty()) { 091 LOG.info("CatalogJanitor Report contains no holes to fix. Skipping."); 092 return; 093 } 094 095 LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.", 096 holes.size()); 097 098 final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes); 099 final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos); 100 final TransitRegionStateProcedure[] assignProcedures = masterServices 101 .getAssignmentManager() 102 .createRoundRobinAssignProcedures(newMetaEntries); 103 104 masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures); 105 LOG.info( 106 "Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size()); 107 } 108 109 /** 110 * Create a new {@link RegionInfo} corresponding to each provided "hole" pair. 111 */ 112 private static List<RegionInfo> createRegionInfosForHoles( 113 final List<Pair<RegionInfo, RegionInfo>> holes) { 114 final List<RegionInfo> newRegionInfos = holes.stream() 115 .map(MetaFixer::getHoleCover) 116 .filter(Optional::isPresent) 117 .map(Optional::get) 118 .collect(Collectors.toList()); 119 LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.", 120 newRegionInfos.size(), holes.size()); 121 return newRegionInfos; 122 } 123 124 /** 125 * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described 126 * in {@code hole}. 127 */ 128 private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) { 129 final RegionInfo left = hole.getFirst(); 130 final RegionInfo right = hole.getSecond(); 131 132 if (left.getTable().equals(right.getTable())) { 133 // Simple case. 134 if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) { 135 LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;" 136 + " left=<{}>, right=<{}>", left, right); 137 return Optional.empty(); 138 } 139 return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey())); 140 } 141 142 final boolean leftUndefined = left.equals(RegionInfo.UNDEFINED); 143 final boolean rightUndefined = right.equals(RegionInfo.UNDEFINED); 144 final boolean last = left.isLast(); 145 final boolean first = right.isFirst(); 146 if (leftUndefined && rightUndefined) { 147 LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " + 148 "UNDEFINED; left=<{}>, right=<{}>", left, right); 149 return Optional.empty(); 150 } 151 if (leftUndefined || last) { 152 return Optional.of( 153 buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey())); 154 } 155 if (rightUndefined || first) { 156 return Optional.of( 157 buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW)); 158 } 159 LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right); 160 return Optional.empty(); 161 } 162 163 private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) { 164 return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build(); 165 } 166 167 /** 168 * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort. 169 * @param masterServices used to connect to {@code hbase:meta} 170 * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem 171 * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were 172 * successfully created 173 */ 174 private static List<RegionInfo> createMetaEntries(final MasterServices masterServices, 175 final List<RegionInfo> newRegionInfos) { 176 177 final List<Either<RegionInfo, IOException>> addMetaEntriesResults = newRegionInfos.stream() 178 .map(regionInfo -> { 179 try { 180 MetaTableAccessor.addRegionToMeta(masterServices.getConnection(), regionInfo); 181 masterServices.getAssignmentManager() 182 .getRegionStates() 183 .updateRegionState(regionInfo, RegionState.State.CLOSED); 184 return Either.<RegionInfo, IOException>ofLeft(regionInfo); 185 } catch (IOException e) { 186 return Either.<RegionInfo, IOException>ofRight(e); 187 } 188 }) 189 .collect(Collectors.toList()); 190 final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream() 191 .filter(Either::hasLeft) 192 .map(Either::getLeft) 193 .collect(Collectors.toList()); 194 final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream() 195 .filter(Either::hasRight) 196 .map(Either::getRight) 197 .collect(Collectors.toList()); 198 LOG.debug("Added {}/{} entries to hbase:meta", 199 createMetaEntriesSuccesses.size(), newRegionInfos.size()); 200 201 if (!createMetaEntriesFailures.isEmpty()) { 202 LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First" 203 + " failure message included; full list of failures with accompanying stack traces is" 204 + " available at log level DEBUG. message={}", createMetaEntriesFailures.size(), 205 addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage()); 206 if (LOG.isDebugEnabled()) { 207 createMetaEntriesFailures.forEach( 208 ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe)); 209 } 210 } 211 212 return createMetaEntriesSuccesses; 213 } 214 215 /** 216 * Fix overlaps noted in CJ consistency report. 217 */ 218 void fixOverlaps(CatalogJanitor.Report report) throws IOException { 219 for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) { 220 RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {}); 221 try { 222 this.masterServices.mergeRegions(regionsArray, 223 true, HConstants.NO_NONCE, HConstants.NO_NONCE); 224 } catch (MergeRegionException mre) { 225 LOG.warn("Failed overlap fix of {}", regionsArray, mre); 226 } 227 } 228 } 229 230 /** 231 * Run through <code>overlaps</code> and return a list of merges to run. 232 * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor 233 * consistency report). 234 * @param maxMergeCount Maximum regions to merge at a time (avoid merging 235 * 100k regions in one go!) 236 */ 237 @VisibleForTesting 238 static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount, 239 List<Pair<RegionInfo, RegionInfo>> overlaps) { 240 if (overlaps.isEmpty()) { 241 LOG.debug("No overlaps."); 242 return Collections.emptyList(); 243 } 244 List<SortedSet<RegionInfo>> merges = new ArrayList<>(); 245 SortedSet<RegionInfo> currentMergeSet = new TreeSet<>(); 246 HashSet<RegionInfo> regionsInMergeSet = new HashSet<>(); 247 RegionInfo regionInfoWithlargestEndKey = null; 248 for (Pair<RegionInfo, RegionInfo> pair: overlaps) { 249 if (regionInfoWithlargestEndKey != null) { 250 if (!isOverlap(regionInfoWithlargestEndKey, pair) || 251 currentMergeSet.size() >= maxMergeCount) { 252 // Log when we cut-off-merge because we hit the configured maximum merge limit. 253 if (currentMergeSet.size() >= maxMergeCount) { 254 LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount); 255 } 256 257 // In the case of the merge set contains only 1 region or empty, it does not need to 258 // submit this merge request as no merge is going to happen. currentMergeSet can be 259 // reused in this case. 260 if (currentMergeSet.size() <= 1) { 261 for (RegionInfo ri : currentMergeSet) { 262 regionsInMergeSet.remove(ri); 263 } 264 currentMergeSet.clear(); 265 } else { 266 merges.add(currentMergeSet); 267 currentMergeSet = new TreeSet<>(); 268 } 269 } 270 } 271 272 // Do not add the same region into multiple merge set, this will fail 273 // the second merge request. 274 if (!regionsInMergeSet.contains(pair.getFirst())) { 275 currentMergeSet.add(pair.getFirst()); 276 regionsInMergeSet.add(pair.getFirst()); 277 } 278 if (!regionsInMergeSet.contains(pair.getSecond())) { 279 currentMergeSet.add(pair.getSecond()); 280 regionsInMergeSet.add(pair.getSecond()); 281 } 282 283 regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey( 284 getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()), 285 regionInfoWithlargestEndKey); 286 } 287 merges.add(currentMergeSet); 288 return merges; 289 } 290 291 /** 292 * @return Either <code>a</code> or <code>b</code>, whichever has the 293 * endkey that is furthest along in the Table. 294 */ 295 @VisibleForTesting 296 static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) { 297 if (a == null) { 298 // b may be null. 299 return b; 300 } 301 if (b == null) { 302 // Both are null. The return is not-defined. 303 return a; 304 } 305 if (!a.getTable().equals(b.getTable())) { 306 // This is an odd one. This should be the right answer. 307 return b; 308 } 309 if (a.isLast()) { 310 return a; 311 } 312 if (b.isLast()) { 313 return b; 314 } 315 int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey()); 316 return compare == 0 || compare > 0? a: b; 317 } 318 319 /** 320 * @return True if an overlap found between passed in <code>ri</code> and 321 * the <code>pair</code>. Does NOT check the pairs themselves overlap. 322 */ 323 @VisibleForTesting 324 static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) { 325 if (ri == null || pair == null) { 326 // Can't be an overlap in either of these cases. 327 return false; 328 } 329 return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond()); 330 } 331 332 /** 333 * A union over {@link L} and {@link R}. 334 */ 335 private static class Either<L, R> { 336 private final L left; 337 private final R right; 338 339 public static <L, R> Either<L, R> ofLeft(L left) { 340 return new Either<>(left, null); 341 } 342 343 public static <L, R> Either<L, R> ofRight(R right) { 344 return new Either<>(null, right); 345 } 346 347 Either(L left, R right) { 348 this.left = left; 349 this.right = right; 350 } 351 352 public boolean hasLeft() { 353 return left != null; 354 } 355 356 public L getLeft() { 357 if (!hasLeft()) { 358 throw new IllegalStateException("Either contains no left."); 359 } 360 return left; 361 } 362 363 public boolean hasRight() { 364 return right != null; 365 } 366 367 public R getRight() { 368 if (!hasRight()) { 369 throw new IllegalStateException("Either contains no right."); 370 } 371 return right; 372 } 373 } 374}