001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.HashSet; 024import java.util.List; 025import java.util.Optional; 026import java.util.Set; 027import java.util.SortedSet; 028import java.util.TreeSet; 029import java.util.stream.Collectors; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.MetaTableAccessor; 032import org.apache.hadoop.hbase.TableName; 033import org.apache.hadoop.hbase.client.RegionInfo; 034import org.apache.hadoop.hbase.client.RegionInfoBuilder; 035import org.apache.hadoop.hbase.client.RegionReplicaUtil; 036import org.apache.hadoop.hbase.client.TableDescriptor; 037import org.apache.hadoop.hbase.exceptions.MergeRegionException; 038import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 039import org.apache.hadoop.hbase.util.Bytes; 040import org.apache.hadoop.hbase.util.Pair; 041import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 046 047 048/** 049 * Server-side fixing of bad or inconsistent state in hbase:meta. 050 * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level 051 * manipulations driven by the Master. This class MetaFixer is 052 * employed by the Master and it 'knows' about holes and orphans 053 * and encapsulates their fixing on behalf of the Master. 054 */ 055@InterfaceAudience.Private 056class MetaFixer { 057 private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class); 058 private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count"; 059 private static final int MAX_MERGE_COUNT_DEFAULT = 64; 060 061 private final MasterServices masterServices; 062 /** 063 * Maximum for many regions to merge at a time. 064 */ 065 private final int maxMergeCount; 066 067 MetaFixer(MasterServices masterServices) { 068 this.masterServices = masterServices; 069 this.maxMergeCount = this.masterServices.getConfiguration(). 070 getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT); 071 } 072 073 void fix() throws IOException { 074 CatalogJanitor.Report report = this.masterServices.getCatalogJanitor().getLastReport(); 075 if (report == null) { 076 LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " + 077 "shell or wait until CatalogJanitor chore runs."); 078 return; 079 } 080 fixHoles(report); 081 fixOverlaps(report); 082 // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which 083 // can help cleaning up damaged hbase:meta. 084 this.masterServices.runReplicationBarrierCleaner(); 085 } 086 087 /** 088 * If hole, it papers it over by adding a region in the filesystem and to hbase:meta. 089 * Does not assign. 090 */ 091 void fixHoles(CatalogJanitor.Report report) { 092 final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles(); 093 if (holes.isEmpty()) { 094 LOG.info("CatalogJanitor Report contains no holes to fix. Skipping."); 095 return; 096 } 097 098 LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.", 099 holes.size()); 100 101 final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes); 102 final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos); 103 final TransitRegionStateProcedure[] assignProcedures = masterServices 104 .getAssignmentManager() 105 .createRoundRobinAssignProcedures(newMetaEntries); 106 107 masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures); 108 LOG.info( 109 "Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size()); 110 } 111 112 /** 113 * Create a new {@link RegionInfo} corresponding to each provided "hole" pair. 114 */ 115 private static List<RegionInfo> createRegionInfosForHoles( 116 final List<Pair<RegionInfo, RegionInfo>> holes) { 117 final List<RegionInfo> newRegionInfos = holes.stream() 118 .map(MetaFixer::getHoleCover) 119 .filter(Optional::isPresent) 120 .map(Optional::get) 121 .collect(Collectors.toList()); 122 LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.", 123 newRegionInfos.size(), holes.size()); 124 return newRegionInfos; 125 } 126 127 /** 128 * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described 129 * in {@code hole}. 130 */ 131 private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) { 132 final RegionInfo left = hole.getFirst(); 133 final RegionInfo right = hole.getSecond(); 134 135 if (left.getTable().equals(right.getTable())) { 136 // Simple case. 137 if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) { 138 LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;" 139 + " left=<{}>, right=<{}>", left, right); 140 return Optional.empty(); 141 } 142 return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey())); 143 } 144 145 final boolean leftUndefined = left.equals(RegionInfo.UNDEFINED); 146 final boolean rightUndefined = right.equals(RegionInfo.UNDEFINED); 147 final boolean last = left.isLast(); 148 final boolean first = right.isFirst(); 149 if (leftUndefined && rightUndefined) { 150 LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " + 151 "UNDEFINED; left=<{}>, right=<{}>", left, right); 152 return Optional.empty(); 153 } 154 if (leftUndefined || last) { 155 return Optional.of( 156 buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey())); 157 } 158 if (rightUndefined || first) { 159 return Optional.of( 160 buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW)); 161 } 162 LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right); 163 return Optional.empty(); 164 } 165 166 private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) { 167 return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build(); 168 } 169 170 /** 171 * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort. 172 * @param masterServices used to connect to {@code hbase:meta} 173 * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem 174 * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were 175 * successfully created 176 */ 177 private static List<RegionInfo> createMetaEntries(final MasterServices masterServices, 178 final List<RegionInfo> newRegionInfos) { 179 180 final List<Either<List<RegionInfo>, IOException>> addMetaEntriesResults = newRegionInfos. 181 stream().map(regionInfo -> { 182 try { 183 TableDescriptor td = masterServices.getTableDescriptors().get(regionInfo.getTable()); 184 185 // Add replicas if needed 186 // we need to create regions with replicaIds starting from 1 187 List<RegionInfo> newRegions = RegionReplicaUtil.addReplicas( 188 Collections.singletonList(regionInfo), 1, td.getRegionReplication()); 189 190 // Add regions to META 191 MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), newRegions, 192 td.getRegionReplication()); 193 194 // Setup replication for region replicas if needed 195 if (td.getRegionReplication() > 1) { 196 ServerRegionReplicaUtil.setupRegionReplicaReplication( 197 masterServices.getConfiguration()); 198 } 199 return Either.<List<RegionInfo>, IOException>ofLeft(newRegions); 200 } catch (IOException e) { 201 return Either.<List<RegionInfo>, IOException>ofRight(e); 202 } 203 }) 204 .collect(Collectors.toList()); 205 final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream() 206 .filter(Either::hasLeft) 207 .map(Either::getLeft) 208 .flatMap(List::stream) 209 .collect(Collectors.toList()); 210 final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream() 211 .filter(Either::hasRight) 212 .map(Either::getRight) 213 .collect(Collectors.toList()); 214 LOG.debug("Added {}/{} entries to hbase:meta", 215 createMetaEntriesSuccesses.size(), newRegionInfos.size()); 216 217 if (!createMetaEntriesFailures.isEmpty()) { 218 LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First" 219 + " failure message included; full list of failures with accompanying stack traces is" 220 + " available at log level DEBUG. message={}", createMetaEntriesFailures.size(), 221 addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage()); 222 if (LOG.isDebugEnabled()) { 223 createMetaEntriesFailures.forEach( 224 ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe)); 225 } 226 } 227 228 return createMetaEntriesSuccesses; 229 } 230 231 /** 232 * Fix overlaps noted in CJ consistency report. 233 */ 234 void fixOverlaps(CatalogJanitor.Report report) throws IOException { 235 for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) { 236 RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {}); 237 try { 238 this.masterServices.mergeRegions(regionsArray, 239 true, HConstants.NO_NONCE, HConstants.NO_NONCE); 240 } catch (MergeRegionException mre) { 241 LOG.warn("Failed overlap fix of {}", regionsArray, mre); 242 } 243 } 244 } 245 246 /** 247 * Run through <code>overlaps</code> and return a list of merges to run. 248 * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor 249 * consistency report). 250 * @param maxMergeCount Maximum regions to merge at a time (avoid merging 251 * 100k regions in one go!) 252 */ 253 @VisibleForTesting 254 static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount, 255 List<Pair<RegionInfo, RegionInfo>> overlaps) { 256 if (overlaps.isEmpty()) { 257 LOG.debug("No overlaps."); 258 return Collections.emptyList(); 259 } 260 List<SortedSet<RegionInfo>> merges = new ArrayList<>(); 261 SortedSet<RegionInfo> currentMergeSet = new TreeSet<>(); 262 HashSet<RegionInfo> regionsInMergeSet = new HashSet<>(); 263 RegionInfo regionInfoWithlargestEndKey = null; 264 for (Pair<RegionInfo, RegionInfo> pair: overlaps) { 265 if (regionInfoWithlargestEndKey != null) { 266 if (!isOverlap(regionInfoWithlargestEndKey, pair) || 267 currentMergeSet.size() >= maxMergeCount) { 268 // Log when we cut-off-merge because we hit the configured maximum merge limit. 269 if (currentMergeSet.size() >= maxMergeCount) { 270 LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount); 271 } 272 273 // In the case of the merge set contains only 1 region or empty, it does not need to 274 // submit this merge request as no merge is going to happen. currentMergeSet can be 275 // reused in this case. 276 if (currentMergeSet.size() <= 1) { 277 for (RegionInfo ri : currentMergeSet) { 278 regionsInMergeSet.remove(ri); 279 } 280 currentMergeSet.clear(); 281 } else { 282 merges.add(currentMergeSet); 283 currentMergeSet = new TreeSet<>(); 284 } 285 } 286 } 287 288 // Do not add the same region into multiple merge set, this will fail 289 // the second merge request. 290 if (!regionsInMergeSet.contains(pair.getFirst())) { 291 currentMergeSet.add(pair.getFirst()); 292 regionsInMergeSet.add(pair.getFirst()); 293 } 294 if (!regionsInMergeSet.contains(pair.getSecond())) { 295 currentMergeSet.add(pair.getSecond()); 296 regionsInMergeSet.add(pair.getSecond()); 297 } 298 299 regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey( 300 getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()), 301 regionInfoWithlargestEndKey); 302 } 303 merges.add(currentMergeSet); 304 return merges; 305 } 306 307 /** 308 * @return Either <code>a</code> or <code>b</code>, whichever has the 309 * endkey that is furthest along in the Table. 310 */ 311 @VisibleForTesting 312 static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) { 313 if (a == null) { 314 // b may be null. 315 return b; 316 } 317 if (b == null) { 318 // Both are null. The return is not-defined. 319 return a; 320 } 321 if (!a.getTable().equals(b.getTable())) { 322 // This is an odd one. This should be the right answer. 323 return b; 324 } 325 if (a.isLast()) { 326 return a; 327 } 328 if (b.isLast()) { 329 return b; 330 } 331 int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey()); 332 return compare == 0 || compare > 0? a: b; 333 } 334 335 /** 336 * @return True if an overlap found between passed in <code>ri</code> and 337 * the <code>pair</code>. Does NOT check the pairs themselves overlap. 338 */ 339 @VisibleForTesting 340 static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) { 341 if (ri == null || pair == null) { 342 // Can't be an overlap in either of these cases. 343 return false; 344 } 345 return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond()); 346 } 347 348 /** 349 * A union over {@link L} and {@link R}. 350 */ 351 private static class Either<L, R> { 352 private final L left; 353 private final R right; 354 355 public static <L, R> Either<L, R> ofLeft(L left) { 356 return new Either<>(left, null); 357 } 358 359 public static <L, R> Either<L, R> ofRight(R right) { 360 return new Either<>(null, right); 361 } 362 363 Either(L left, R right) { 364 this.left = left; 365 this.right = right; 366 } 367 368 public boolean hasLeft() { 369 return left != null; 370 } 371 372 public L getLeft() { 373 if (!hasLeft()) { 374 throw new IllegalStateException("Either contains no left."); 375 } 376 return left; 377 } 378 379 public boolean hasRight() { 380 return right != null; 381 } 382 383 public R getRight() { 384 if (!hasRight()) { 385 throw new IllegalStateException("Either contains no right."); 386 } 387 return right; 388 } 389 } 390}