001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collection;
023import java.util.Collections;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Map;
027import java.util.Optional;
028import java.util.Set;
029import java.util.SortedSet;
030import java.util.TreeSet;
031import java.util.stream.Collectors;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.MetaTableAccessor;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.RegionInfoBuilder;
037import org.apache.hadoop.hbase.client.RegionReplicaUtil;
038import org.apache.hadoop.hbase.client.TableDescriptor;
039import org.apache.hadoop.hbase.exceptions.MergeRegionException;
040import org.apache.hadoop.hbase.master.MasterServices;
041import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
042import org.apache.hadoop.hbase.util.Bytes;
043import org.apache.hadoop.hbase.util.Pair;
044import org.apache.yetus.audience.InterfaceAudience;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
049import org.apache.hbase.thirdparty.com.google.common.collect.ListMultimap;
050
051/**
052 * Server-side fixing of bad or inconsistent state in hbase:meta. Distinct from MetaTableAccessor
053 * because {@link MetaTableAccessor} is about low-level manipulations driven by the Master. This
054 * class MetaFixer is employed by the Master and it 'knows' about holes and orphans and encapsulates
055 * their fixing on behalf of the Master.
056 */
057@InterfaceAudience.Private
058public class MetaFixer {
059  private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
060  private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
061  private static final int MAX_MERGE_COUNT_DEFAULT = 64;
062
063  private final MasterServices masterServices;
064  /**
065   * Maximum for many regions to merge at a time.
066   */
067  private final int maxMergeCount;
068
069  public MetaFixer(MasterServices masterServices) {
070    this.masterServices = masterServices;
071    this.maxMergeCount =
072      this.masterServices.getConfiguration().getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT);
073  }
074
075  public void fix() throws IOException {
076    CatalogJanitorReport report = this.masterServices.getCatalogJanitor().getLastReport();
077    if (report == null) {
078      LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in "
079        + "shell or wait until CatalogJanitor chore runs.");
080      return;
081    }
082    fixHoles(report);
083    fixOverlaps(report);
084    // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
085    // can help cleaning up damaged hbase:meta.
086    this.masterServices.runReplicationBarrierCleaner();
087  }
088
089  /**
090   * If hole, it papers it over by adding a region in the filesystem and to hbase:meta. Does not
091   * assign.
092   */
093  void fixHoles(CatalogJanitorReport report) {
094    final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
095    if (holes.isEmpty()) {
096      LOG.info("CatalogJanitor Report contains no holes to fix. Skipping.");
097      return;
098    }
099
100    LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.",
101      holes.size());
102
103    final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
104    final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
105    final TransitRegionStateProcedure[] assignProcedures =
106      masterServices.getAssignmentManager().createRoundRobinAssignProcedures(newMetaEntries);
107
108    masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures);
109    LOG.info("Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size());
110  }
111
112  /**
113   * Create a new {@link RegionInfo} corresponding to each provided "hole" pair.
114   */
115  private static List<RegionInfo>
116    createRegionInfosForHoles(final List<Pair<RegionInfo, RegionInfo>> holes) {
117    final List<RegionInfo> newRegionInfos = holes.stream().map(MetaFixer::getHoleCover)
118      .filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
119    LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.",
120      newRegionInfos.size(), holes.size());
121    return newRegionInfos;
122  }
123
124  /**
125   * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described
126   *         in {@code hole}.
127   */
128  private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
129    final RegionInfo left = hole.getFirst();
130    final RegionInfo right = hole.getSecond();
131
132    if (left.getTable().equals(right.getTable())) {
133      // Simple case.
134      if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) {
135        LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;"
136          + " left=<{}>, right=<{}>", left, right);
137        return Optional.empty();
138      }
139      return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey()));
140    }
141
142    final boolean leftUndefined = left.equals(RegionInfoBuilder.UNDEFINED);
143    final boolean rightUndefined = right.equals(RegionInfoBuilder.UNDEFINED);
144    final boolean last = left.isLast();
145    final boolean first = right.isFirst();
146    if (leftUndefined && rightUndefined) {
147      LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are "
148        + "UNDEFINED; left=<{}>, right=<{}>", left, right);
149      return Optional.empty();
150    }
151    if (leftUndefined || last) {
152      return Optional
153        .of(buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey()));
154    }
155    if (rightUndefined || first) {
156      return Optional
157        .of(buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW));
158    }
159    LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right);
160    return Optional.empty();
161  }
162
163  private static RegionInfo buildRegionInfo(TableName tn, byte[] start, byte[] end) {
164    return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build();
165  }
166
167  /**
168   * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort.
169   * @param masterServices used to connect to {@code hbase:meta}
170   * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem
171   * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were
172   *         successfully created
173   */
174  private static List<RegionInfo> createMetaEntries(final MasterServices masterServices,
175    final List<RegionInfo> newRegionInfos) {
176
177    final List<Either<List<RegionInfo>, IOException>> addMetaEntriesResults =
178      newRegionInfos.stream().map(regionInfo -> {
179        try {
180          TableDescriptor td = masterServices.getTableDescriptors().get(regionInfo.getTable());
181
182          // Add replicas if needed
183          // we need to create regions with replicaIds starting from 1
184          List<RegionInfo> newRegions = RegionReplicaUtil
185            .addReplicas(Collections.singletonList(regionInfo), 1, td.getRegionReplication());
186
187          // Add regions to META
188          MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), newRegions,
189            td.getRegionReplication());
190
191          return Either.<List<RegionInfo>, IOException> ofLeft(newRegions);
192        } catch (IOException e) {
193          return Either.<List<RegionInfo>, IOException> ofRight(e);
194        }
195      }).collect(Collectors.toList());
196    final List<RegionInfo> createMetaEntriesSuccesses =
197      addMetaEntriesResults.stream().filter(Either::hasLeft).map(Either::getLeft)
198        .flatMap(List::stream).collect(Collectors.toList());
199    final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream()
200      .filter(Either::hasRight).map(Either::getRight).collect(Collectors.toList());
201    LOG.debug("Added {}/{} entries to hbase:meta", createMetaEntriesSuccesses.size(),
202      newRegionInfos.size());
203
204    if (!createMetaEntriesFailures.isEmpty()) {
205      LOG.warn(
206        "Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First"
207          + " failure message included; full list of failures with accompanying stack traces is"
208          + " available at log level DEBUG. message={}",
209        createMetaEntriesFailures.size(), addMetaEntriesResults.size(),
210        createMetaEntriesFailures.get(0).getMessage());
211      if (LOG.isDebugEnabled()) {
212        createMetaEntriesFailures
213          .forEach(ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe));
214      }
215    }
216
217    return createMetaEntriesSuccesses;
218  }
219
220  /**
221   * Fix overlaps noted in CJ consistency report.
222   */
223  List<Long> fixOverlaps(CatalogJanitorReport report) throws IOException {
224    List<Long> pidList = new ArrayList<>();
225    for (Set<RegionInfo> regions : calculateMerges(maxMergeCount, report.getOverlaps())) {
226      RegionInfo[] regionsArray = regions.toArray(new RegionInfo[] {});
227      try {
228        pidList.add(this.masterServices.mergeRegions(regionsArray, true, HConstants.NO_NONCE,
229          HConstants.NO_NONCE));
230      } catch (MergeRegionException mre) {
231        LOG.warn("Failed overlap fix of {}", regionsArray, mre);
232      }
233    }
234    return pidList;
235  }
236
237  /**
238   * Run through <code>overlaps</code> and return a list of merges to run. Presumes overlaps are
239   * ordered (which they are coming out of the CatalogJanitor consistency report).
240   * @param maxMergeCount Maximum regions to merge at a time (avoid merging 100k regions in one go!)
241   */
242  static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount,
243    List<Pair<RegionInfo, RegionInfo>> overlaps) {
244    if (overlaps.isEmpty()) {
245      LOG.debug("No overlaps.");
246      return Collections.emptyList();
247    }
248    List<SortedSet<RegionInfo>> merges = new ArrayList<>();
249    // First group overlaps by table then calculate merge table by table.
250    ListMultimap<TableName, Pair<RegionInfo, RegionInfo>> overlapGroups =
251      ArrayListMultimap.create();
252    for (Pair<RegionInfo, RegionInfo> pair : overlaps) {
253      overlapGroups.put(pair.getFirst().getTable(), pair);
254    }
255    for (Map.Entry<TableName, Collection<Pair<RegionInfo, RegionInfo>>> entry : overlapGroups
256      .asMap().entrySet()) {
257      calculateTableMerges(maxMergeCount, merges, entry.getValue());
258    }
259    return merges;
260  }
261
262  private static void calculateTableMerges(int maxMergeCount, List<SortedSet<RegionInfo>> merges,
263    Collection<Pair<RegionInfo, RegionInfo>> overlaps) {
264    SortedSet<RegionInfo> currentMergeSet = new TreeSet<>();
265    HashSet<RegionInfo> regionsInMergeSet = new HashSet<>();
266    RegionInfo regionInfoWithlargestEndKey = null;
267    for (Pair<RegionInfo, RegionInfo> pair : overlaps) {
268      if (regionInfoWithlargestEndKey != null) {
269        if (
270          !isOverlap(regionInfoWithlargestEndKey, pair) || currentMergeSet.size() >= maxMergeCount
271        ) {
272          // Log when we cut-off-merge because we hit the configured maximum merge limit.
273          if (currentMergeSet.size() >= maxMergeCount) {
274            LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount);
275          }
276
277          // In the case of the merge set contains only 1 region or empty, it does not need to
278          // submit this merge request as no merge is going to happen. currentMergeSet can be
279          // reused in this case.
280          if (currentMergeSet.size() <= 1) {
281            for (RegionInfo ri : currentMergeSet) {
282              regionsInMergeSet.remove(ri);
283            }
284            currentMergeSet.clear();
285          } else {
286            merges.add(currentMergeSet);
287            currentMergeSet = new TreeSet<>();
288          }
289        }
290      }
291
292      // Do not add the same region into multiple merge set, this will fail
293      // the second merge request.
294      if (!regionsInMergeSet.contains(pair.getFirst())) {
295        currentMergeSet.add(pair.getFirst());
296        regionsInMergeSet.add(pair.getFirst());
297      }
298      if (!regionsInMergeSet.contains(pair.getSecond())) {
299        currentMergeSet.add(pair.getSecond());
300        regionsInMergeSet.add(pair.getSecond());
301      }
302
303      regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey(
304        getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()),
305        regionInfoWithlargestEndKey);
306    }
307    merges.add(currentMergeSet);
308  }
309
310  /**
311   * @return Either <code>a</code> or <code>b</code>, whichever has the endkey that is furthest
312   *         along in the Table.
313   */
314  static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) {
315    if (a == null) {
316      // b may be null.
317      return b;
318    }
319    if (b == null) {
320      // Both are null. The return is not-defined.
321      return a;
322    }
323    if (!a.getTable().equals(b.getTable())) {
324      // This is an odd one. This should be the right answer.
325      return b;
326    }
327    if (a.isLast()) {
328      return a;
329    }
330    if (b.isLast()) {
331      return b;
332    }
333    int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey());
334    return compare == 0 || compare > 0 ? a : b;
335  }
336
337  /**
338   * @return True if an overlap found between passed in <code>ri</code> and the <code>pair</code>.
339   *         Does NOT check the pairs themselves overlap.
340   */
341  static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) {
342    if (ri == null || pair == null) {
343      // Can't be an overlap in either of these cases.
344      return false;
345    }
346    return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond());
347  }
348
349  /**
350   * A union over {@link L} and {@link R}.
351   */
352  private static class Either<L, R> {
353    private final L left;
354    private final R right;
355
356    public static <L, R> Either<L, R> ofLeft(L left) {
357      return new Either<>(left, null);
358    }
359
360    public static <L, R> Either<L, R> ofRight(R right) {
361      return new Either<>(null, right);
362    }
363
364    Either(L left, R right) {
365      this.left = left;
366      this.right = right;
367    }
368
369    public boolean hasLeft() {
370      return left != null;
371    }
372
373    public L getLeft() {
374      if (!hasLeft()) {
375        throw new IllegalStateException("Either contains no left.");
376      }
377      return left;
378    }
379
380    public boolean hasRight() {
381      return right != null;
382    }
383
384    public R getRight() {
385      if (!hasRight()) {
386        throw new IllegalStateException("Either contains no right.");
387      }
388      return right;
389    }
390  }
391}