View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map.Entry;
26  import java.util.TreeMap;
27  import java.util.TreeSet;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
33  
34  import com.google.common.collect.ArrayListMultimap;
35  import com.google.common.collect.Multimap;
36  import com.google.common.collect.TreeMultimap;
37  
38  /**
39   * This is a generic region split calculator. It requires Ranges that provide
40   * start, end, and a comparator. It works in two phases -- the first adds ranges
41   * and rejects backwards ranges. Then one calls calcRegions to generate the
42   * multimap that has a start split key as a key and possibly multiple Ranges as
43   * members.
44   * 
45   * To traverse, one normally would get the split set, and iterate through the
46   * calcRegions. Normal regions would have only one entry, holes would have zero,
47   * and any overlaps would have multiple entries.
48   * 
49   * The interface is a bit cumbersome currently but is exposed this way so that
50   * clients can choose how to iterate through the region splits.
51   * 
52   * @param <R>
53   */
54  @InterfaceAudience.Private
55  public class RegionSplitCalculator<R extends KeyRange> {
56    private static final Log LOG = LogFactory.getLog(RegionSplitCalculator.class);
57  
58    private final Comparator<R> rangeCmp;
59    /**
60     * This contains a sorted set of all the possible split points
61     * 
62     * Invariant: once populated this has 0 entries if empty or at most n+1 values
63     * where n == number of added ranges.
64     */
65    private final TreeSet<byte[]> splits = new TreeSet<byte[]>(BYTES_COMPARATOR);
66  
67    /**
68     * This is a map from start key to regions with the same start key.
69     * 
70     * Invariant: This always have n values in total
71     */
72    private final Multimap<byte[], R> starts = ArrayListMultimap.create();
73  
74    /**
75     * SPECIAL CASE
76     */
77    private final static byte[] ENDKEY = null;
78  
79    public RegionSplitCalculator(Comparator<R> cmp) {
80      rangeCmp = cmp;
81    }
82  
83    public final static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator() {
84      @Override
85      public int compare(byte[] l, byte[] r) {
86        if (l == null && r == null)
87          return 0;
88        if (l == null)
89          return 1;
90        if (r == null)
91          return -1;
92        return super.compare(l, r);
93      }
94    };
95  
96    /**
97     * SPECIAL CASE wrapper for empty end key
98     * 
99     * @return ENDKEY if end key is empty, else normal endkey.
100    */
101   private static <R extends KeyRange> byte[] specialEndKey(R range) {
102     byte[] end = range.getEndKey();
103     if (end.length == 0) {
104       return ENDKEY;
105     }
106     return end;
107   }
108 
109   /**
110    * Adds an edge to the split calculator
111    * 
112    * @return true if is included, false if backwards/invalid
113    */
114   public boolean add(R range) {
115     byte[] start = range.getStartKey();
116     byte[] end = specialEndKey(range);
117 
118     // No need to use Arrays.equals because ENDKEY is null
119     if (end != ENDKEY && Bytes.compareTo(start, end) > 0) {
120       // don't allow backwards edges
121       LOG.debug("attempted to add backwards edge: " + Bytes.toString(start)
122           + " " + Bytes.toString(end));
123       return false;
124     }
125 
126     splits.add(start);
127     splits.add(end);
128     starts.put(start, range);
129     return true;
130   }
131 
132   /**
133    * Generates a coverage multimap from split key to Regions that start with the
134    * split key.
135    * 
136    * @return coverage multimap
137    */
138   public Multimap<byte[], R> calcCoverage() {
139     // This needs to be sorted to force the use of the comparator on the values,
140     // otherwise byte array comparison isn't used
141     Multimap<byte[], R> regions = TreeMultimap.create(BYTES_COMPARATOR,
142         rangeCmp);
143 
144     // march through all splits from the start points
145     for (Entry<byte[], Collection<R>> start : starts.asMap().entrySet()) {
146       byte[] key = start.getKey();
147       for (R r : start.getValue()) {
148         regions.put(key, r);
149 
150         for (byte[] coveredSplit : splits.subSet(r.getStartKey(),
151             specialEndKey(r))) {
152           regions.put(coveredSplit, r);
153         }
154       }
155     }
156     return regions;
157   }
158 
159   public TreeSet<byte[]> getSplits() {
160     return splits;
161   }
162 
163   public Multimap<byte[], R> getStarts() {
164     return starts;
165   }
166 
167   /**
168    * Find specified number of top ranges in a big overlap group.
169    * It could return less if there are not that many top ranges.
170    * Once these top ranges are excluded, the big overlap group will
171    * be broken into ranges with no overlapping, or smaller overlapped
172    * groups, and most likely some holes.
173    *
174    * @param bigOverlap a list of ranges that overlap with each other
175    * @param count the max number of ranges to find
176    * @return a list of ranges that overlap with most others
177    */
178   public static <R extends KeyRange> List<R>
179       findBigRanges(Collection<R> bigOverlap, int count) {
180     List<R> bigRanges = new ArrayList<R>();
181 
182     // The key is the count of overlaps,
183     // The value is a list of ranges that have that many overlaps
184     TreeMap<Integer, List<R>> overlapRangeMap = new TreeMap<Integer, List<R>>();
185     for (R r: bigOverlap) {
186       // Calculates the # of overlaps for each region
187       // and populates rangeOverlapMap
188       byte[] startKey = r.getStartKey();
189       byte[] endKey = specialEndKey(r);
190 
191       int overlappedRegions = 0;
192       for (R rr: bigOverlap) {
193         byte[] start = rr.getStartKey();
194         byte[] end = specialEndKey(rr);
195 
196         if (BYTES_COMPARATOR.compare(startKey, end) < 0
197             && BYTES_COMPARATOR.compare(endKey, start) > 0) {
198           overlappedRegions++;
199         }
200       }
201 
202       // One region always overlaps with itself,
203       // so overlappedRegions should be more than 1
204       // for actual overlaps.
205       if (overlappedRegions > 1) {
206         Integer key = Integer.valueOf(overlappedRegions);
207         List<R> ranges = overlapRangeMap.get(key);
208         if (ranges == null) {
209           ranges = new ArrayList<R>();
210           overlapRangeMap.put(key, ranges);
211         }
212         ranges.add(r);
213       }
214     }
215     int toBeAdded = count;
216     for (Integer key: overlapRangeMap.descendingKeySet()) {
217       List<R> chunk = overlapRangeMap.get(key);
218       int chunkSize = chunk.size();
219       if (chunkSize <= toBeAdded) {
220         bigRanges.addAll(chunk);
221         toBeAdded -= chunkSize;
222         if (toBeAdded > 0) continue;
223       } else {
224         // Try to use the middle chunk in case the overlapping is
225         // chained, for example: [a, c), [b, e), [d, g), [f h)...
226         // In such a case, sideline the middle chunk will break
227         // the group efficiently.
228         int start = (chunkSize - toBeAdded)/2;
229         int end = start + toBeAdded;
230         for (int i = start; i < end; i++) {
231           bigRanges.add(chunk.get(i));
232         }
233       }
234       break;
235     }
236     return bigRanges;
237   }
238 }