001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collection;
023import java.util.Collections;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Map;
027import java.util.Optional;
028import java.util.Set;
029import java.util.SortedSet;
030import java.util.TreeSet;
031import java.util.stream.Collectors;
032import org.apache.hadoop.hbase.HBaseIOException;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.MetaTableAccessor;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.RegionInfoBuilder;
038import org.apache.hadoop.hbase.client.RegionReplicaUtil;
039import org.apache.hadoop.hbase.client.TableDescriptor;
040import org.apache.hadoop.hbase.exceptions.MergeRegionException;
041import org.apache.hadoop.hbase.master.MasterServices;
042import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
043import org.apache.hadoop.hbase.replication.ReplicationException;
044import org.apache.hadoop.hbase.util.Bytes;
045import org.apache.hadoop.hbase.util.Pair;
046import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
047import org.apache.yetus.audience.InterfaceAudience;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
052import org.apache.hbase.thirdparty.com.google.common.collect.ListMultimap;
053
054/**
055 * Server-side fixing of bad or inconsistent state in hbase:meta.
056 * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level
057 * manipulations driven by the Master. This class MetaFixer is
058 * employed by the Master and it 'knows' about holes and orphans
059 * and encapsulates their fixing on behalf of the Master.
060 */
061@InterfaceAudience.Private
062public class MetaFixer {
063  private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
064  private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
065  private static final int MAX_MERGE_COUNT_DEFAULT = 64;
066
067  private final MasterServices masterServices;
068  /**
069   * Maximum for many regions to merge at a time.
070   */
071  private final int maxMergeCount;
072
073  public MetaFixer(MasterServices masterServices) {
074    this.masterServices = masterServices;
075    this.maxMergeCount = this.masterServices.getConfiguration().
076        getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT);
077  }
078
079  public void fix() throws IOException {
080    Report report = this.masterServices.getCatalogJanitor().getLastReport();
081    if (report == null) {
082      LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " +
083          "shell or wait until CatalogJanitor chore runs.");
084      return;
085    }
086    fixHoles(report);
087    fixOverlaps(report);
088    // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
089    // can help cleaning up damaged hbase:meta.
090    this.masterServices.runReplicationBarrierCleaner();
091  }
092
093  /**
094   * If hole, it papers it over by adding a region in the filesystem and to hbase:meta.
095   * Does not assign.
096   */
097  void fixHoles(Report report) {
098    final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
099    if (holes.isEmpty()) {
100      LOG.info("CatalogJanitor Report contains no holes to fix. Skipping.");
101      return;
102    }
103
104    LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.",
105      holes.size());
106
107    final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
108    final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
109    final TransitRegionStateProcedure[] assignProcedures = masterServices
110      .getAssignmentManager()
111      .createRoundRobinAssignProcedures(newMetaEntries);
112
113    masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures);
114    LOG.info(
115      "Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size());
116  }
117
118  /**
119   * Create a new {@link RegionInfo} corresponding to each provided "hole" pair.
120   */
121  private static List<RegionInfo> createRegionInfosForHoles(
122    final List<Pair<RegionInfo, RegionInfo>> holes) {
123    final List<RegionInfo> newRegionInfos = holes.stream()
124      .map(MetaFixer::getHoleCover)
125      .filter(Optional::isPresent)
126      .map(Optional::get)
127      .collect(Collectors.toList());
128    LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.",
129      newRegionInfos.size(), holes.size());
130    return newRegionInfos;
131  }
132
133  /**
134   * @return Attempts to calculate a new {@link RegionInfo} that covers the region range described
135   *   in {@code hole}.
136   */
137  private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
138    final RegionInfo left = hole.getFirst();
139    final RegionInfo right = hole.getSecond();
140
141    if (left.getTable().equals(right.getTable())) {
142      // Simple case.
143      if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) {
144        LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;"
145          + " left=<{}>, right=<{}>", left, right);
146        return Optional.empty();
147      }
148      return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey()));
149    }
150
151    final boolean leftUndefined = left.equals(RegionInfoBuilder.UNDEFINED);
152    final boolean rightUndefined = right.equals(RegionInfoBuilder.UNDEFINED);
153    final boolean last = left.isLast();
154    final boolean first = right.isFirst();
155    if (leftUndefined && rightUndefined) {
156      LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " +
157        "UNDEFINED; left=<{}>, right=<{}>", left, right);
158      return Optional.empty();
159    }
160    if (leftUndefined || last) {
161      return Optional.of(
162        buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey()));
163    }
164    if (rightUndefined || first) {
165      return Optional.of(
166        buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW));
167    }
168    LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right);
169    return Optional.empty();
170  }
171
172  private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) {
173    return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build();
174  }
175
176  /**
177   * Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort.
178   * @param masterServices used to connect to {@code hbase:meta}
179   * @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem
180   * @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were
181   *   successfully created
182   */
183  private static List<RegionInfo> createMetaEntries(final MasterServices masterServices,
184    final List<RegionInfo> newRegionInfos) {
185
186    final List<Either<List<RegionInfo>, IOException>> addMetaEntriesResults = newRegionInfos.
187      stream().map(regionInfo -> {
188        try {
189          TableDescriptor td = masterServices.getTableDescriptors().get(regionInfo.getTable());
190
191          // Add replicas if needed
192          // we need to create regions with replicaIds starting from 1
193          List<RegionInfo> newRegions = RegionReplicaUtil
194            .addReplicas(Collections.singletonList(regionInfo), 1, td.getRegionReplication());
195
196          // Add regions to META
197          MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), newRegions,
198            td.getRegionReplication());
199
200          // Setup replication for region replicas if needed
201          if (td.getRegionReplication() > 1) {
202            ServerRegionReplicaUtil.setupRegionReplicaReplication(masterServices);
203          }
204          return Either.<List<RegionInfo>, IOException> ofLeft(newRegions);
205        } catch (IOException e) {
206          return Either.<List<RegionInfo>, IOException> ofRight(e);
207        } catch (ReplicationException e) {
208          return Either.<List<RegionInfo>, IOException> ofRight(new HBaseIOException(e));
209        }
210      })
211      .collect(Collectors.toList());
212    final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream()
213      .filter(Either::hasLeft)
214      .map(Either::getLeft)
215      .flatMap(List::stream)
216      .collect(Collectors.toList());
217    final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream()
218      .filter(Either::hasRight)
219      .map(Either::getRight)
220      .collect(Collectors.toList());
221    LOG.debug("Added {}/{} entries to hbase:meta",
222      createMetaEntriesSuccesses.size(), newRegionInfos.size());
223
224    if (!createMetaEntriesFailures.isEmpty()) {
225      LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First"
226          + " failure message included; full list of failures with accompanying stack traces is"
227          + " available at log level DEBUG. message={}", createMetaEntriesFailures.size(),
228        addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage());
229      if (LOG.isDebugEnabled()) {
230        createMetaEntriesFailures.forEach(
231          ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe));
232      }
233    }
234
235    return createMetaEntriesSuccesses;
236  }
237
238  /**
239   * Fix overlaps noted in CJ consistency report.
240   */
241  List<Long> fixOverlaps(Report report) throws IOException {
242    List<Long> pidList = new ArrayList<>();
243    for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) {
244      RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {});
245      try {
246        pidList.add(this.masterServices
247          .mergeRegions(regionsArray, true, HConstants.NO_NONCE, HConstants.NO_NONCE));
248      } catch (MergeRegionException mre) {
249        LOG.warn("Failed overlap fix of {}", regionsArray, mre);
250      }
251    }
252    return pidList;
253  }
254
255  /**
256   * Run through <code>overlaps</code> and return a list of merges to run.
257   * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor
258   * consistency report).
259   * @param maxMergeCount Maximum regions to merge at a time (avoid merging
260   *   100k regions in one go!)
261   */
262  static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount,
263      List<Pair<RegionInfo, RegionInfo>> overlaps) {
264    if (overlaps.isEmpty()) {
265      LOG.debug("No overlaps.");
266      return Collections.emptyList();
267    }
268    List<SortedSet<RegionInfo>> merges = new ArrayList<>();
269    // First group overlaps by table then calculate merge table by table.
270    ListMultimap<TableName, Pair<RegionInfo, RegionInfo>> overlapGroups =
271      ArrayListMultimap.create();
272    for (Pair<RegionInfo, RegionInfo> pair : overlaps) {
273      overlapGroups.put(pair.getFirst().getTable(), pair);
274    }
275    for (Map.Entry<TableName, Collection<Pair<RegionInfo, RegionInfo>>> entry : overlapGroups
276      .asMap().entrySet()) {
277      calculateTableMerges(maxMergeCount, merges, entry.getValue());
278    }
279    return merges;
280  }
281
282  private static void calculateTableMerges(int maxMergeCount, List<SortedSet<RegionInfo>> merges,
283    Collection<Pair<RegionInfo, RegionInfo>> overlaps) {
284    SortedSet<RegionInfo> currentMergeSet = new TreeSet<>();
285    HashSet<RegionInfo> regionsInMergeSet = new HashSet<>();
286    RegionInfo regionInfoWithlargestEndKey =  null;
287    for (Pair<RegionInfo, RegionInfo> pair: overlaps) {
288      if (regionInfoWithlargestEndKey != null) {
289        if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
290            currentMergeSet.size() >= maxMergeCount) {
291          // Log when we cut-off-merge because we hit the configured maximum merge limit.
292          if (currentMergeSet.size() >= maxMergeCount) {
293            LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount);
294          }
295
296          // In the case of the merge set contains only 1 region or empty, it does not need to
297          // submit this merge request as no merge is going to happen. currentMergeSet can be
298          // reused in this case.
299          if (currentMergeSet.size() <= 1) {
300            for (RegionInfo ri : currentMergeSet) {
301              regionsInMergeSet.remove(ri);
302            }
303            currentMergeSet.clear();
304          } else {
305            merges.add(currentMergeSet);
306            currentMergeSet = new TreeSet<>();
307          }
308        }
309      }
310
311      // Do not add the same region into multiple merge set, this will fail
312      // the second merge request.
313      if (!regionsInMergeSet.contains(pair.getFirst())) {
314        currentMergeSet.add(pair.getFirst());
315        regionsInMergeSet.add(pair.getFirst());
316      }
317      if (!regionsInMergeSet.contains(pair.getSecond())) {
318        currentMergeSet.add(pair.getSecond());
319        regionsInMergeSet.add(pair.getSecond());
320      }
321
322      regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey(
323        getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()),
324          regionInfoWithlargestEndKey);
325    }
326    merges.add(currentMergeSet);
327  }
328
329  /**
330   * @return Either <code>a</code> or <code>b</code>, whichever has the
331   *   endkey that is furthest along in the Table.
332   */
333  static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) {
334    if (a == null) {
335      // b may be null.
336      return b;
337    }
338    if (b == null) {
339      // Both are null. The return is not-defined.
340      return a;
341    }
342    if (!a.getTable().equals(b.getTable())) {
343      // This is an odd one. This should be the right answer.
344      return b;
345    }
346    if (a.isLast()) {
347      return a;
348    }
349    if (b.isLast()) {
350      return b;
351    }
352    int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey());
353    return compare == 0 || compare > 0? a: b;
354  }
355
356  /**
357   * @return True if an overlap found between passed in <code>ri</code> and
358   *   the <code>pair</code>. Does NOT check the pairs themselves overlap.
359   */
360  static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) {
361    if (ri == null || pair == null) {
362      // Can't be an overlap in either of these cases.
363      return false;
364    }
365    return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond());
366  }
367
368  /**
369   * A union over {@link L} and {@link R}.
370   */
371  private static class Either<L, R> {
372    private final L left;
373    private final R right;
374
375    public static <L, R> Either<L, R> ofLeft(L left) {
376      return new Either<>(left, null);
377    }
378
379    public static <L, R> Either<L, R> ofRight(R right) {
380      return new Either<>(null, right);
381    }
382
383    Either(L left, R right) {
384      this.left = left;
385      this.right = right;
386    }
387
388    public boolean hasLeft() {
389      return left != null;
390    }
391
392    public L getLeft() {
393      if (!hasLeft()) {
394        throw new IllegalStateException("Either contains no left.");
395      }
396      return left;
397    }
398
399    public boolean hasRight() {
400      return right != null;
401    }
402
403    public R getRight() {
404      if (!hasRight()) {
405        throw new IllegalStateException("Either contains no right.");
406      }
407      return right;
408    }
409  }
410}