001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.Set;
025import java.util.SortedSet;
026import java.util.TreeSet;
027
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.MetaTableAccessor;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.Put;
033import org.apache.hadoop.hbase.client.RegionInfo;
034import org.apache.hadoop.hbase.client.RegionInfoBuilder;
035import org.apache.hadoop.hbase.exceptions.MergeRegionException;
036import org.apache.hadoop.hbase.regionserver.HRegion;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.hadoop.hbase.util.FSUtils;
039import org.apache.hadoop.hbase.util.Pair;
040import org.apache.yetus.audience.InterfaceAudience;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043
044
045import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
046
047
048/**
049 * Server-side fixing of bad or inconsistent state in hbase:meta.
050 * Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level
051 * manipulations driven by the Master. This class MetaFixer is
052 * employed by the Master and it 'knows' about holes and orphans
053 * and encapsulates their fixing on behalf of the Master.
054 */
055@InterfaceAudience.Private
056class MetaFixer {
057  private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
058  private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
059  private static final int MAX_MERGE_COUNT_DEFAULT = 10;
060  private final MasterServices masterServices;
061  /**
062   * Maximum for many regions to merge at a time.
063   */
064  private final int maxMergeCount;
065
066  MetaFixer(MasterServices masterServices) {
067    this.masterServices = masterServices;
068    this.maxMergeCount = this.masterServices.getConfiguration().
069        getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT);
070  }
071
072  void fix() throws IOException {
073    CatalogJanitor.Report report = this.masterServices.getCatalogJanitor().getLastReport();
074    if (report == null) {
075      LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " +
076          "shell or wait until CatalogJanitor chore runs.");
077      return;
078    }
079    fixHoles(report);
080    fixOverlaps(report);
081  }
082
083  /**
084   * If hole, it papers it over by adding a region in the filesystem and to hbase:meta.
085   * Does not assign.
086   */
087  void fixHoles(CatalogJanitor.Report report) throws IOException {
088    List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
089    if (holes.isEmpty()) {
090      LOG.debug("No holes.");
091      return;
092    }
093    for (Pair<RegionInfo, RegionInfo> p: holes) {
094      RegionInfo ri = getHoleCover(p);
095      if (ri == null) {
096        continue;
097      }
098      Configuration configuration = this.masterServices.getConfiguration();
099      HRegion.createRegionDir(configuration, ri, FSUtils.getRootDir(configuration));
100      // If an error here, then we'll have a region in the filesystem but not
101      // in hbase:meta (if the below fails). Should be able to rerun the fix.
102      // The second call to createRegionDir will just go through. Idempotent.
103      Put put = MetaTableAccessor.makePutFromRegionInfo(ri, HConstants.LATEST_TIMESTAMP);
104      MetaTableAccessor.putsToMetaTable(this.masterServices.getConnection(),
105          Collections.singletonList(put));
106      LOG.info("Fixed hole by adding {}; region is NOT assigned (assign to online).", ri);
107    }
108  }
109
110  /**
111   * @return Calculated RegionInfo that covers the hole <code>hole</code>
112   */
113  private RegionInfo getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
114    RegionInfo holeCover = null;
115    RegionInfo left = hole.getFirst();
116    RegionInfo right = hole.getSecond();
117    if (left.getTable().equals(right.getTable())) {
118      // Simple case.
119      if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) {
120        LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey; " +
121            "left=<{}>, right=<{}>", left, right);
122        return holeCover;
123      }
124      holeCover = buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey());
125    } else {
126      boolean leftUndefined = left.equals(RegionInfo.UNDEFINED);
127      boolean rightUnefined = right.equals(RegionInfo.UNDEFINED);
128      boolean last = left.isLast();
129      boolean first = right.isFirst();
130      if (leftUndefined && rightUnefined) {
131        LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " +
132            "UNDEFINED; left=<{}>, right=<{}>", left, right);
133        return holeCover;
134      }
135      if (leftUndefined || last) {
136        holeCover = buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW,
137            right.getStartKey());
138      } else if (rightUnefined || first) {
139        holeCover = buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW);
140      } else {
141        LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>",
142            left, right);
143        return holeCover;
144      }
145    }
146    return holeCover;
147  }
148
149  private RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) {
150    return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build();
151  }
152
153  /**
154   * Fix overlaps noted in CJ consistency report.
155   */
156  void fixOverlaps(CatalogJanitor.Report report) throws IOException {
157    for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) {
158      RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {});
159      try {
160        this.masterServices.mergeRegions(regionsArray,
161            false, HConstants.NO_NONCE, HConstants.NO_NONCE);
162      } catch (MergeRegionException mre) {
163        LOG.warn("Failed overlap fix of {}", regionsArray, mre);
164      }
165    }
166  }
167
168  /**
169   * Run through <code>overlaps</code> and return a list of merges to run.
170   * Presumes overlaps are ordered (which they are coming out of the CatalogJanitor
171   * consistency report).
172   * @param maxMergeCount Maximum regions to merge at a time (avoid merging
173   *   100k regions in one go!)
174   */
175  @VisibleForTesting
176  static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount,
177      List<Pair<RegionInfo, RegionInfo>> overlaps) {
178    if (overlaps.isEmpty()) {
179      LOG.debug("No overlaps.");
180      return Collections.emptyList();
181    }
182    List<SortedSet<RegionInfo>> merges = new ArrayList<>();
183    SortedSet<RegionInfo> currentMergeSet = new TreeSet<>();
184    RegionInfo regionInfoWithlargestEndKey =  null;
185    for (Pair<RegionInfo, RegionInfo> pair: overlaps) {
186      if (regionInfoWithlargestEndKey != null) {
187        if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
188            currentMergeSet.size() >= maxMergeCount) {
189          merges.add(currentMergeSet);
190          currentMergeSet = new TreeSet<>();
191        }
192      }
193      currentMergeSet.add(pair.getFirst());
194      currentMergeSet.add(pair.getSecond());
195      regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey(
196        getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()),
197          regionInfoWithlargestEndKey);
198    }
199    merges.add(currentMergeSet);
200    return merges;
201  }
202
203  /**
204   * @return Either <code>a</code> or <code>b</code>, whichever has the
205   *   endkey that is furthest along in the Table.
206   */
207  @VisibleForTesting
208  static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) {
209    if (a == null) {
210      // b may be null.
211      return b;
212    }
213    if (b == null) {
214      // Both are null. The return is not-defined.
215      return a;
216    }
217    if (!a.getTable().equals(b.getTable())) {
218      // This is an odd one. This should be the right answer.
219      return b;
220    }
221    if (a.isLast()) {
222      return a;
223    }
224    if (b.isLast()) {
225      return b;
226    }
227    int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey());
228    return compare == 0 || compare > 0? a: b;
229  }
230
231  /**
232   * @return True if an overlap found between passed in <code>ri</code> and
233   *   the <code>pair</code>. Does NOT check the pairs themselves overlap.
234   */
235  @VisibleForTesting
236  static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) {
237    if (ri == null || pair == null) {
238      // Can't be an overlap in either of these cases.
239      return false;
240    }
241    return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond());
242  }
243}