001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Optional;
026import java.util.Set;
027import java.util.SortedSet;
028import java.util.TreeSet;
029import java.util.stream.Collectors;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.RegionInfo;
034import org.apache.hadoop.hbase.client.RegionInfoBuilder;
035import org.apache.hadoop.hbase.exceptions.MergeRegionException;
036import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.hadoop.hbase.util.Pair;
039import org.apache.yetus.audience.InterfaceAudience;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
043
044
045/**
046 * Server-side fixing of bad or inconsistent state in hbase:meta.
047 * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level
048 * manipulations driven by the Master. This class MetaFixer is
049 * employed by the Master and it 'knows' about holes and orphans
050 * and encapsulates their fixing on behalf of the Master.
051 */
052@InterfaceAudience.Private
053class MetaFixer {
054  private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
055  private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
056  private static final int MAX_MERGE_COUNT_DEFAULT = 64;
057
058  private final MasterServices masterServices;
059  /**
060   * Maximum for many regions to merge at a time.
061   */
062  private final int maxMergeCount;
063
064  MetaFixer(MasterServices masterServices) {
065    this.masterServices = masterServices;
066    this.maxMergeCount = this.masterServices.getConfiguration().
067        getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT);
068  }
069
070  void fix() throws IOException {
071    CatalogJanitor.Report report = this.masterServices.getCatalogJanitor().getLastReport();
072    if (report == null) {
073      LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " +
074          "shell or wait until CatalogJanitor chore runs.");
075      return;
076    }
077    fixHoles(report);
078    fixOverlaps(report);
079    // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
080    // can help cleaning up damaged hbase:meta.
081    this.masterServices.runReplicationBarrierCleaner();
082  }
083
084  /**
085   * If hole, it papers it over by adding a region in the filesystem and to hbase:meta.
086   * Does not assign.
087   */
088  void fixHoles(CatalogJanitor.Report report) {
089    final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
090    if (holes.isEmpty()) {
091      LOG.info("CatalogJanitor Report contains no holes to fix. Skipping.");
092      return;
093    }
094
095    LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.",
096      holes.size());
097
098    final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
099    final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
100    final TransitRegionStateProcedure[] assignProcedures = masterServices
101      .getAssignmentManager()
102      .createRoundRobinAssignProcedures(newMetaEntries);
103
104    masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures);
105    LOG.info(
106      "Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size());
107  }
108
109  /**
110   * Create a new {@link RegionInfo} corresponding to each provided "hole" pair.
111   */
112  private static List<RegionInfo> createRegionInfosForHoles(
113    final List<Pair<RegionInfo, RegionInfo>> holes) {
114    final List<RegionInfo> newRegionInfos = holes.stream()
115      .map(MetaFixer::getHoleCover)
116      .filter(Optional::isPresent)
117      .map(Optional::get)
118      .collect(Collectors.toList());
119    LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.",
120      newRegionInfos.size(), holes.size());
121    return newRegionInfos;
122  }
123
124  /**
125   * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described
126   *   in {@code hole}.
127   */
128  private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
129    final RegionInfo left = hole.getFirst();
130    final RegionInfo right = hole.getSecond();
131
132    if (left.getTable().equals(right.getTable())) {
133      // Simple case.
134      if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) {
135        LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;"
136          + " left=<{}>, right=<{}>", left, right);
137        return Optional.empty();
138      }
139      return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey()));
140    }
141
142    final boolean leftUndefined = left.equals(RegionInfo.UNDEFINED);
143    final boolean rightUndefined = right.equals(RegionInfo.UNDEFINED);
144    final boolean last = left.isLast();
145    final boolean first = right.isFirst();
146    if (leftUndefined && rightUndefined) {
147      LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " +
148        "UNDEFINED; left=<{}>, right=<{}>", left, right);
149      return Optional.empty();
150    }
151    if (leftUndefined || last) {
152      return Optional.of(
153        buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey()));
154    }
155    if (rightUndefined || first) {
156      return Optional.of(
157        buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW));
158    }
159    LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right);
160    return Optional.empty();
161  }
162
163  private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) {
164    return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build();
165  }
166
167  /**
168   * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort.
169   * @param masterServices used to connect to {@code hbase:meta}
170   * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem
171   * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were
172   *   successfully created
173   */
174  private static List<RegionInfo> createMetaEntries(final MasterServices masterServices,
175    final List<RegionInfo> newRegionInfos) {
176
177    final List<Either<RegionInfo, IOException>> addMetaEntriesResults = newRegionInfos.stream()
178      .map(regionInfo -> {
179        try {
180          MetaTableAccessor.addRegionToMeta(masterServices.getConnection(), regionInfo);
181          masterServices.getAssignmentManager()
182            .getRegionStates()
183            .updateRegionState(regionInfo, RegionState.State.CLOSED);
184          return Either.<RegionInfo, IOException>ofLeft(regionInfo);
185        } catch (IOException e) {
186          return Either.<RegionInfo, IOException>ofRight(e);
187        }
188      })
189      .collect(Collectors.toList());
190    final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream()
191      .filter(Either::hasLeft)
192      .map(Either::getLeft)
193      .collect(Collectors.toList());
194    final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream()
195      .filter(Either::hasRight)
196      .map(Either::getRight)
197      .collect(Collectors.toList());
198    LOG.debug("Added {}/{} entries to hbase:meta",
199      createMetaEntriesSuccesses.size(), newRegionInfos.size());
200
201    if (!createMetaEntriesFailures.isEmpty()) {
202      LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First"
203          + " failure message included; full list of failures with accompanying stack traces is"
204          + " available at log level DEBUG. message={}", createMetaEntriesFailures.size(),
205        addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage());
206      if (LOG.isDebugEnabled()) {
207        createMetaEntriesFailures.forEach(
208          ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe));
209      }
210    }
211
212    return createMetaEntriesSuccesses;
213  }
214
215  /**
216   * Fix overlaps noted in CJ consistency report.
217   */
218  void fixOverlaps(CatalogJanitor.Report report) throws IOException {
219    for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) {
220      RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {});
221      try {
222        this.masterServices.mergeRegions(regionsArray,
223            true, HConstants.NO_NONCE, HConstants.NO_NONCE);
224      } catch (MergeRegionException mre) {
225        LOG.warn("Failed overlap fix of {}", regionsArray, mre);
226      }
227    }
228  }
229
230  /**
231   * Run through <code>overlaps</code> and return a list of merges to run.
232   * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor
233   * consistency report).
234   * @param maxMergeCount Maximum regions to merge at a time (avoid merging
235   *   100k regions in one go!)
236   */
237  @VisibleForTesting
238  static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount,
239      List<Pair<RegionInfo, RegionInfo>> overlaps) {
240    if (overlaps.isEmpty()) {
241      LOG.debug("No overlaps.");
242      return Collections.emptyList();
243    }
244    List<SortedSet<RegionInfo>> merges = new ArrayList<>();
245    SortedSet<RegionInfo> currentMergeSet = new TreeSet<>();
246    HashSet<RegionInfo> regionsInMergeSet = new HashSet<>();
247    RegionInfo regionInfoWithlargestEndKey =  null;
248    for (Pair<RegionInfo, RegionInfo> pair: overlaps) {
249      if (regionInfoWithlargestEndKey != null) {
250        if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
251            currentMergeSet.size() >= maxMergeCount) {
252          // Log when we cut-off-merge because we hit the configured maximum merge limit.
253          if (currentMergeSet.size() >= maxMergeCount) {
254            LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount);
255          }
256
257          // In the case of the merge set contains only 1 region or empty, it does not need to
258          // submit this merge request as no merge is going to happen. currentMergeSet can be
259          // reused in this case.
260          if (currentMergeSet.size() <= 1) {
261            for (RegionInfo ri : currentMergeSet) {
262              regionsInMergeSet.remove(ri);
263            }
264            currentMergeSet.clear();
265          } else {
266            merges.add(currentMergeSet);
267            currentMergeSet = new TreeSet<>();
268          }
269        }
270      }
271
272      // Do not add the same region into multiple merge set, this will fail
273      // the second merge request.
274      if (!regionsInMergeSet.contains(pair.getFirst())) {
275        currentMergeSet.add(pair.getFirst());
276        regionsInMergeSet.add(pair.getFirst());
277      }
278      if (!regionsInMergeSet.contains(pair.getSecond())) {
279        currentMergeSet.add(pair.getSecond());
280        regionsInMergeSet.add(pair.getSecond());
281      }
282
283      regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey(
284        getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()),
285          regionInfoWithlargestEndKey);
286    }
287    merges.add(currentMergeSet);
288    return merges;
289  }
290
291  /**
292   * @return Either <code>a</code> or <code>b</code>, whichever has the
293   *   endkey that is furthest along in the Table.
294   */
295  @VisibleForTesting
296  static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) {
297    if (a == null) {
298      // b may be null.
299      return b;
300    }
301    if (b == null) {
302      // Both are null. The return is not-defined.
303      return a;
304    }
305    if (!a.getTable().equals(b.getTable())) {
306      // This is an odd one. This should be the right answer.
307      return b;
308    }
309    if (a.isLast()) {
310      return a;
311    }
312    if (b.isLast()) {
313      return b;
314    }
315    int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey());
316    return compare == 0 || compare > 0? a: b;
317  }
318
319  /**
320   * @return True if an overlap found between passed in <code>ri</code> and
321   *   the <code>pair</code>. Does NOT check the pairs themselves overlap.
322   */
323  @VisibleForTesting
324  static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) {
325    if (ri == null || pair == null) {
326      // Can't be an overlap in either of these cases.
327      return false;
328    }
329    return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond());
330  }
331
332  /**
333   * A union over {@link L} and {@link R}.
334   */
335  private static class Either<L, R> {
336    private final L left;
337    private final R right;
338
339    public static <L, R> Either<L, R> ofLeft(L left) {
340      return new Either<>(left, null);
341    }
342
343    public static <L, R> Either<L, R> ofRight(R right) {
344      return new Either<>(null, right);
345    }
346
347    Either(L left, R right) {
348      this.left = left;
349      this.right = right;
350    }
351
352    public boolean hasLeft() {
353      return left != null;
354    }
355
356    public L getLeft() {
357      if (!hasLeft()) {
358        throw new IllegalStateException("Either contains no left.");
359      }
360      return left;
361    }
362
363    public boolean hasRight() {
364      return right != null;
365    }
366
367    public R getRight() {
368      if (!hasRight()) {
369        throw new IllegalStateException("Either contains no right.");
370      }
371      return right;
372    }
373  }
374}