001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Optional;
026import java.util.Set;
027import java.util.SortedSet;
028import java.util.TreeSet;
029import java.util.stream.Collectors;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.RegionInfo;
034import org.apache.hadoop.hbase.client.RegionInfoBuilder;
035import org.apache.hadoop.hbase.client.RegionReplicaUtil;
036import org.apache.hadoop.hbase.client.TableDescriptor;
037import org.apache.hadoop.hbase.exceptions.MergeRegionException;
038import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
039import org.apache.hadoop.hbase.util.Bytes;
040import org.apache.hadoop.hbase.util.Pair;
041import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
046
047
048/**
049 * Server-side fixing of bad or inconsistent state in hbase:meta.
050 * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level
051 * manipulations driven by the Master. This class MetaFixer is
052 * employed by the Master and it 'knows' about holes and orphans
053 * and encapsulates their fixing on behalf of the Master.
054 */
055@InterfaceAudience.Private
056class MetaFixer {
057  private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
058  private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
059  private static final int MAX_MERGE_COUNT_DEFAULT = 64;
060
061  private final MasterServices masterServices;
062  /**
063   * Maximum for many regions to merge at a time.
064   */
065  private final int maxMergeCount;
066
067  MetaFixer(MasterServices masterServices) {
068    this.masterServices = masterServices;
069    this.maxMergeCount = this.masterServices.getConfiguration().
070      getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT);
071  }
072
073  void fix() throws IOException {
074    CatalogJanitor.Report report = this.masterServices.getCatalogJanitor().getLastReport();
075    if (report == null) {
076      LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " +
077          "shell or wait until CatalogJanitor chore runs.");
078      return;
079    }
080    fixHoles(report);
081    fixOverlaps(report);
082    // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
083    // can help cleaning up damaged hbase:meta.
084    this.masterServices.runReplicationBarrierCleaner();
085  }
086
087  /**
088   * If hole, it papers it over by adding a region in the filesystem and to hbase:meta.
089   * Does not assign.
090   */
091  void fixHoles(CatalogJanitor.Report report) {
092    final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
093    if (holes.isEmpty()) {
094      LOG.info("CatalogJanitor Report contains no holes to fix. Skipping.");
095      return;
096    }
097
098    LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.",
099      holes.size());
100
101    final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
102    final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
103    final TransitRegionStateProcedure[] assignProcedures = masterServices
104      .getAssignmentManager()
105      .createRoundRobinAssignProcedures(newMetaEntries);
106
107    masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures);
108    LOG.info(
109      "Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size());
110  }
111
112  /**
113   * Create a new {@link RegionInfo} corresponding to each provided "hole" pair.
114   */
115  private static List<RegionInfo> createRegionInfosForHoles(
116    final List<Pair<RegionInfo, RegionInfo>> holes) {
117    final List<RegionInfo> newRegionInfos = holes.stream()
118      .map(MetaFixer::getHoleCover)
119      .filter(Optional::isPresent)
120      .map(Optional::get)
121      .collect(Collectors.toList());
122    LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.",
123      newRegionInfos.size(), holes.size());
124    return newRegionInfos;
125  }
126
127  /**
128   * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described
129   *   in {@code hole}.
130   */
131  private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
132    final RegionInfo left = hole.getFirst();
133    final RegionInfo right = hole.getSecond();
134
135    if (left.getTable().equals(right.getTable())) {
136      // Simple case.
137      if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) {
138        LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;"
139          + " left=<{}>, right=<{}>", left, right);
140        return Optional.empty();
141      }
142      return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey()));
143    }
144
145    final boolean leftUndefined = left.equals(RegionInfo.UNDEFINED);
146    final boolean rightUndefined = right.equals(RegionInfo.UNDEFINED);
147    final boolean last = left.isLast();
148    final boolean first = right.isFirst();
149    if (leftUndefined && rightUndefined) {
150      LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " +
151        "UNDEFINED; left=<{}>, right=<{}>", left, right);
152      return Optional.empty();
153    }
154    if (leftUndefined || last) {
155      return Optional.of(
156        buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey()));
157    }
158    if (rightUndefined || first) {
159      return Optional.of(
160        buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW));
161    }
162    LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right);
163    return Optional.empty();
164  }
165
166  private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) {
167    return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build();
168  }
169
170  /**
171   * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort.
172   * @param masterServices used to connect to {@code hbase:meta}
173   * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem
174   * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were
175   *   successfully created
176   */
177  private static List<RegionInfo> createMetaEntries(final MasterServices masterServices,
178    final List<RegionInfo> newRegionInfos) {
179
180    final List<Either<List<RegionInfo>, IOException>> addMetaEntriesResults = newRegionInfos.
181      stream().map(regionInfo -> {
182        try {
183          TableDescriptor td = masterServices.getTableDescriptors().get(regionInfo.getTable());
184
185          // Add replicas if needed
186          // we need to create regions with replicaIds starting from 1
187          List<RegionInfo> newRegions = RegionReplicaUtil.addReplicas(
188            Collections.singletonList(regionInfo), 1, td.getRegionReplication());
189
190          // Add regions to META
191          MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), newRegions,
192            td.getRegionReplication());
193
194          // Setup replication for region replicas if needed
195          if (td.getRegionReplication() > 1) {
196            ServerRegionReplicaUtil.setupRegionReplicaReplication(
197              masterServices.getConfiguration());
198          }
199          return Either.<List<RegionInfo>, IOException>ofLeft(newRegions);
200        } catch (IOException e) {
201          return Either.<List<RegionInfo>, IOException>ofRight(e);
202        }
203      })
204      .collect(Collectors.toList());
205    final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream()
206      .filter(Either::hasLeft)
207      .map(Either::getLeft)
208      .flatMap(List::stream)
209      .collect(Collectors.toList());
210    final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream()
211      .filter(Either::hasRight)
212      .map(Either::getRight)
213      .collect(Collectors.toList());
214    LOG.debug("Added {}/{} entries to hbase:meta",
215      createMetaEntriesSuccesses.size(), newRegionInfos.size());
216
217    if (!createMetaEntriesFailures.isEmpty()) {
218      LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First"
219          + " failure message included; full list of failures with accompanying stack traces is"
220          + " available at log level DEBUG. message={}", createMetaEntriesFailures.size(),
221        addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage());
222      if (LOG.isDebugEnabled()) {
223        createMetaEntriesFailures.forEach(
224          ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe));
225      }
226    }
227
228    return createMetaEntriesSuccesses;
229  }
230
231  /**
232   * Fix overlaps noted in CJ consistency report.
233   */
234  void fixOverlaps(CatalogJanitor.Report report) throws IOException {
235    for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) {
236      RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {});
237      try {
238        this.masterServices.mergeRegions(regionsArray,
239            true, HConstants.NO_NONCE, HConstants.NO_NONCE);
240      } catch (MergeRegionException mre) {
241        LOG.warn("Failed overlap fix of {}", regionsArray, mre);
242      }
243    }
244  }
245
246  /**
247   * Run through <code>overlaps</code> and return a list of merges to run.
248   * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor
249   * consistency report).
250   * @param maxMergeCount Maximum regions to merge at a time (avoid merging
251   *   100k regions in one go!)
252   */
253  @VisibleForTesting
254  static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount,
255      List<Pair<RegionInfo, RegionInfo>> overlaps) {
256    if (overlaps.isEmpty()) {
257      LOG.debug("No overlaps.");
258      return Collections.emptyList();
259    }
260    List<SortedSet<RegionInfo>> merges = new ArrayList<>();
261    SortedSet<RegionInfo> currentMergeSet = new TreeSet<>();
262    HashSet<RegionInfo> regionsInMergeSet = new HashSet<>();
263    RegionInfo regionInfoWithlargestEndKey =  null;
264    for (Pair<RegionInfo, RegionInfo> pair: overlaps) {
265      if (regionInfoWithlargestEndKey != null) {
266        if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
267            currentMergeSet.size() >= maxMergeCount) {
268          // Log when we cut-off-merge because we hit the configured maximum merge limit.
269          if (currentMergeSet.size() >= maxMergeCount) {
270            LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount);
271          }
272
273          // In the case of the merge set contains only 1 region or empty, it does not need to
274          // submit this merge request as no merge is going to happen. currentMergeSet can be
275          // reused in this case.
276          if (currentMergeSet.size() <= 1) {
277            for (RegionInfo ri : currentMergeSet) {
278              regionsInMergeSet.remove(ri);
279            }
280            currentMergeSet.clear();
281          } else {
282            merges.add(currentMergeSet);
283            currentMergeSet = new TreeSet<>();
284          }
285        }
286      }
287
288      // Do not add the same region into multiple merge set, this will fail
289      // the second merge request.
290      if (!regionsInMergeSet.contains(pair.getFirst())) {
291        currentMergeSet.add(pair.getFirst());
292        regionsInMergeSet.add(pair.getFirst());
293      }
294      if (!regionsInMergeSet.contains(pair.getSecond())) {
295        currentMergeSet.add(pair.getSecond());
296        regionsInMergeSet.add(pair.getSecond());
297      }
298
299      regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey(
300        getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()),
301          regionInfoWithlargestEndKey);
302    }
303    merges.add(currentMergeSet);
304    return merges;
305  }
306
307  /**
308   * @return Either <code>a</code> or <code>b</code>, whichever has the
309   *   endkey that is furthest along in the Table.
310   */
311  @VisibleForTesting
312  static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) {
313    if (a == null) {
314      // b may be null.
315      return b;
316    }
317    if (b == null) {
318      // Both are null. The return is not-defined.
319      return a;
320    }
321    if (!a.getTable().equals(b.getTable())) {
322      // This is an odd one. This should be the right answer.
323      return b;
324    }
325    if (a.isLast()) {
326      return a;
327    }
328    if (b.isLast()) {
329      return b;
330    }
331    int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey());
332    return compare == 0 || compare > 0? a: b;
333  }
334
335  /**
336   * @return True if an overlap found between passed in <code>ri</code> and
337   *   the <code>pair</code>. Does NOT check the pairs themselves overlap.
338   */
339  @VisibleForTesting
340  static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) {
341    if (ri == null || pair == null) {
342      // Can't be an overlap in either of these cases.
343      return false;
344    }
345    return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond());
346  }
347
348  /**
349   * A union over {@link L} and {@link R}.
350   */
351  private static class Either<L, R> {
352    private final L left;
353    private final R right;
354
355    public static <L, R> Either<L, R> ofLeft(L left) {
356      return new Either<>(left, null);
357    }
358
359    public static <L, R> Either<L, R> ofRight(R right) {
360      return new Either<>(null, right);
361    }
362
363    Either(L left, R right) {
364      this.left = left;
365      this.right = right;
366    }
367
368    public boolean hasLeft() {
369      return left != null;
370    }
371
372    public L getLeft() {
373      if (!hasLeft()) {
374        throw new IllegalStateException("Either contains no left.");
375      }
376      return left;
377    }
378
379    public boolean hasRight() {
380      return right != null;
381    }
382
383    public R getRight() {
384      if (!hasRight()) {
385        throw new IllegalStateException("Either contains no right.");
386      }
387      return right;
388    }
389  }
390}