001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.util.ArrayList; 021import java.util.Collection; 022import java.util.Comparator; 023import java.util.List; 024import java.util.Map.Entry; 025import java.util.TreeMap; 026import java.util.TreeSet; 027import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator; 028import org.apache.yetus.audience.InterfaceAudience; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap; 033import org.apache.hbase.thirdparty.com.google.common.collect.Multimap; 034import org.apache.hbase.thirdparty.com.google.common.collect.TreeMultimap; 035 036/** 037 * This is a generic region split calculator. It requires Ranges that provide start, end, and a 038 * comparator. It works in two phases -- the first adds ranges and rejects backwards ranges. Then 039 * one calls calcRegions to generate the multimap that has a start split key as a key and possibly 040 * multiple Ranges as members. To traverse, one normally would get the split set, and iterate 041 * through the calcRegions. Normal regions would have only one entry, holes would have zero, and any 042 * overlaps would have multiple entries. The interface is a bit cumbersome currently but is exposed 043 * this way so that clients can choose how to iterate through the region splits. 044 * @param <R> 045 */ 046@InterfaceAudience.Private 047public class RegionSplitCalculator<R extends KeyRange> { 048 private static final Logger LOG = LoggerFactory.getLogger(RegionSplitCalculator.class); 049 050 private final Comparator<R> rangeCmp; 051 /** 052 * This contains a sorted set of all the possible split points Invariant: once populated this has 053 * 0 entries if empty or at most n+1 values where n == number of added ranges. 054 */ 055 private final TreeSet<byte[]> splits = new TreeSet<>(BYTES_COMPARATOR); 056 057 /** 058 * This is a map from start key to regions with the same start key. Invariant: This always have n 059 * values in total 060 */ 061 private final Multimap<byte[], R> starts = ArrayListMultimap.create(); 062 063 /** 064 * SPECIAL CASE 065 */ 066 private final static byte[] ENDKEY = null; 067 068 public RegionSplitCalculator(Comparator<R> cmp) { 069 rangeCmp = cmp; 070 } 071 072 public final static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator() { 073 @Override 074 public int compare(byte[] l, byte[] r) { 075 if (l == null && r == null) return 0; 076 if (l == null) return 1; 077 if (r == null) return -1; 078 return super.compare(l, r); 079 } 080 }; 081 082 /** 083 * SPECIAL CASE wrapper for empty end key 084 * @return ENDKEY if end key is empty, else normal endkey. 085 */ 086 private static <R extends KeyRange> byte[] specialEndKey(R range) { 087 byte[] end = range.getEndKey(); 088 if (end.length == 0) { 089 return ENDKEY; 090 } 091 return end; 092 } 093 094 /** 095 * Adds an edge to the split calculator 096 * @return true if is included, false if backwards/invalid 097 */ 098 public boolean add(R range) { 099 byte[] start = range.getStartKey(); 100 byte[] end = specialEndKey(range); 101 102 // No need to use Arrays.equals because ENDKEY is null 103 if (end != ENDKEY && Bytes.compareTo(start, end) > 0) { 104 // don't allow backwards edges 105 LOG.debug( 106 "attempted to add backwards edge: " + Bytes.toString(start) + " " + Bytes.toString(end)); 107 return false; 108 } 109 110 splits.add(start); 111 splits.add(end); 112 starts.put(start, range); 113 return true; 114 } 115 116 /** 117 * Generates a coverage multimap from split key to Regions that start with the split key. 118 * @return coverage multimap 119 */ 120 public Multimap<byte[], R> calcCoverage() { 121 // This needs to be sorted to force the use of the comparator on the values, 122 // otherwise byte array comparison isn't used 123 Multimap<byte[], R> regions = TreeMultimap.create(BYTES_COMPARATOR, rangeCmp); 124 125 // march through all splits from the start points 126 for (Entry<byte[], Collection<R>> start : starts.asMap().entrySet()) { 127 byte[] key = start.getKey(); 128 for (R r : start.getValue()) { 129 regions.put(key, r); 130 131 for (byte[] coveredSplit : splits.subSet(r.getStartKey(), specialEndKey(r))) { 132 regions.put(coveredSplit, r); 133 } 134 } 135 } 136 return regions; 137 } 138 139 public TreeSet<byte[]> getSplits() { 140 return splits; 141 } 142 143 public Multimap<byte[], R> getStarts() { 144 return starts; 145 } 146 147 /** 148 * Find specified number of top ranges in a big overlap group. It could return less if there are 149 * not that many top ranges. Once these top ranges are excluded, the big overlap group will be 150 * broken into ranges with no overlapping, or smaller overlapped groups, and most likely some 151 * holes. 152 * @param bigOverlap a list of ranges that overlap with each other 153 * @param count the max number of ranges to find 154 * @return a list of ranges that overlap with most others 155 */ 156 public static <R extends KeyRange> List<R> findBigRanges(Collection<R> bigOverlap, int count) { 157 List<R> bigRanges = new ArrayList<>(); 158 159 // The key is the count of overlaps, 160 // The value is a list of ranges that have that many overlaps 161 TreeMap<Integer, List<R>> overlapRangeMap = new TreeMap<>(); 162 for (R r : bigOverlap) { 163 // Calculates the # of overlaps for each region 164 // and populates rangeOverlapMap 165 byte[] startKey = r.getStartKey(); 166 byte[] endKey = specialEndKey(r); 167 168 int overlappedRegions = 0; 169 for (R rr : bigOverlap) { 170 byte[] start = rr.getStartKey(); 171 byte[] end = specialEndKey(rr); 172 173 if ( 174 BYTES_COMPARATOR.compare(startKey, end) < 0 && BYTES_COMPARATOR.compare(endKey, start) > 0 175 ) { 176 overlappedRegions++; 177 } 178 } 179 180 // One region always overlaps with itself, 181 // so overlappedRegions should be more than 1 182 // for actual overlaps. 183 if (overlappedRegions > 1) { 184 Integer key = Integer.valueOf(overlappedRegions); 185 List<R> ranges = overlapRangeMap.get(key); 186 if (ranges == null) { 187 ranges = new ArrayList<>(); 188 overlapRangeMap.put(key, ranges); 189 } 190 ranges.add(r); 191 } 192 } 193 int toBeAdded = count; 194 for (Integer key : overlapRangeMap.descendingKeySet()) { 195 List<R> chunk = overlapRangeMap.get(key); 196 int chunkSize = chunk.size(); 197 if (chunkSize <= toBeAdded) { 198 bigRanges.addAll(chunk); 199 toBeAdded -= chunkSize; 200 if (toBeAdded > 0) continue; 201 } else { 202 // Try to use the middle chunk in case the overlapping is 203 // chained, for example: [a, c), [b, e), [d, g), [f h)... 204 // In such a case, sideline the middle chunk will break 205 // the group efficiently. 206 int start = (chunkSize - toBeAdded) / 2; 207 int end = start + toBeAdded; 208 for (int i = start; i < end; i++) { 209 bigRanges.add(chunk.get(i)); 210 } 211 } 212 break; 213 } 214 return bigRanges; 215 } 216}