View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.util.byterange;
20  
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.util.ArrayUtils;
27  import org.apache.hadoop.hbase.util.ByteRange;
28  import org.apache.hadoop.hbase.util.Bytes;
29  import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
30  
31  import com.google.common.collect.Lists;
32  
33  /**
34   * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
35   * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
36   * <p>
37   * Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and
38   * {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
39   * trie-oriented ByteRangeTrieSet, etc
40   */
41  @InterfaceAudience.Private
42  public abstract class ByteRangeSet {
43  
44    /******************** fields **********************/
45  
46    protected byte[] byteAppender;
47    protected int numBytes;
48  
49    protected Map<ByteRange, Integer> uniqueIndexByUniqueRange;
50  
51    protected ArrayList<ByteRange> uniqueRanges;
52    protected int numUniqueRanges = 0;
53  
54    protected int[] uniqueRangeIndexByInsertionId;
55    protected int numInputs;
56  
57    protected List<Integer> sortedIndexByUniqueIndex;
58    protected int[] sortedIndexByInsertionId;
59    protected ArrayList<ByteRange> sortedRanges;
60  
61  
62    /****************** construct **********************/
63  
64    protected ByteRangeSet() {
65      this.byteAppender = new byte[0];
66      this.uniqueRanges = Lists.newArrayList();
67      this.uniqueRangeIndexByInsertionId = new int[0];
68      this.sortedIndexByUniqueIndex = Lists.newArrayList();
69      this.sortedIndexByInsertionId = new int[0];
70      this.sortedRanges = Lists.newArrayList();
71    }
72  
73    public void reset() {
74      numBytes = 0;
75      uniqueIndexByUniqueRange.clear();
76      numUniqueRanges = 0;
77      numInputs = 0;
78      sortedIndexByUniqueIndex.clear();
79      sortedRanges.clear();
80    }
81  
82  
83    /*************** abstract *************************/
84  
85    public abstract void addToSortedRanges();
86  
87  
88    /**************** methods *************************/
89  
90    /**
91     * Check if the incoming byte range exists.  If not, add it to the backing byteAppender[] and
92     * insert it into the tracking Map uniqueIndexByUniqueRange.
93     */
94    public void add(ByteRange bytes) {
95      Integer index = uniqueIndexByUniqueRange.get(bytes);
96      if (index == null) {
97        index = store(bytes);
98      }
99      int minLength = numInputs + 1;
100     uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
101         minLength, 2 * minLength);
102     uniqueRangeIndexByInsertionId[numInputs] = index;
103     ++numInputs;
104   }
105 
106   protected int store(ByteRange bytes) {
107     int indexOfNewElement = numUniqueRanges;
108     if (uniqueRanges.size() <= numUniqueRanges) {
109       uniqueRanges.add(new SimpleMutableByteRange());
110     }
111     ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
112     int neededBytes = numBytes + bytes.getLength();
113     byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
114     bytes.deepCopyTo(byteAppender, numBytes);
115     storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
116     numBytes += bytes.getLength();
117     uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
118     int newestUniqueIndex = numUniqueRanges;
119     ++numUniqueRanges;
120     return newestUniqueIndex;
121   }
122 
123   public ByteRangeSet compile() {
124     addToSortedRanges();
125     for (int i = 0; i < sortedRanges.size(); ++i) {
126       sortedIndexByUniqueIndex.add(null);// need to grow the size
127     }
128     // TODO move this to an invert(int[]) util method
129     for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
130       int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
131       sortedIndexByUniqueIndex.set(uniqueIndex, i);
132     }
133     sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
134         numInputs);
135     for (int i = 0; i < numInputs; ++i) {
136       int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
137       int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
138       sortedIndexByInsertionId[i] = sortedIndex;
139     }
140     return this;
141   }
142 
143   public int getSortedIndexForInsertionId(int insertionId) {
144     return sortedIndexByInsertionId[insertionId];
145   }
146 
147   public int size() {
148     return uniqueIndexByUniqueRange.size();
149   }
150 
151 
152   /***************** standard methods ************************/
153 
154   @Override
155   public String toString() {
156     StringBuilder sb = new StringBuilder();
157     int i = 0;
158     for (ByteRange r : sortedRanges) {
159       if (i > 0) {
160         sb.append("\n");
161       }
162       sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
163       ++i;
164     }
165     sb.append("\ntotalSize:" + numBytes);
166     sb.append("\navgSize:" + getAvgSize());
167     return sb.toString();
168   }
169 
170 
171   /**************** get/set *****************************/
172 
173   public ArrayList<ByteRange> getSortedRanges() {
174     return sortedRanges;
175   }
176 
177   public long getAvgSize() {
178     return numBytes / numUniqueRanges;
179   }
180 
181 }