View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
20  
21  import java.io.ByteArrayOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.util.Arrays;
25  import java.util.HashSet;
26  
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.util.ArrayUtils;
29  import org.apache.hadoop.hbase.util.CollectionUtils;
30  import org.apache.hadoop.hbase.util.vint.UFIntTool;
31  
32  import com.google.common.base.Joiner;
33  
34  /**
35   * Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
36   * collection of Cells.
37   *
38   * 1. add longs to a HashSet for fast de-duplication
39   * 2. keep track of the min and max
40   * 3. copy all values to a new long[]
41   * 4. Collections.sort the long[]
42   * 5. calculate maxDelta = max - min
43   * 6. determine FInt width based on maxDelta
44   * 7. PrefixTreeEncoder binary searches to find index of each value
45   */
46  @InterfaceAudience.Private
47  public class LongEncoder {
48  
49    /****************** fields ****************************/
50  
51    protected HashSet<Long> uniqueValues;
52    protected long[] sortedUniqueValues;
53    protected long min, max, maxDelta;
54  
55    protected int bytesPerDelta;
56    protected int bytesPerIndex;
57    protected int totalCompressedBytes;
58  
59  
60    /****************** construct ****************************/
61  
62    public LongEncoder() {
63      this.uniqueValues = new HashSet<Long>();
64    }
65  
66    public void reset() {
67      uniqueValues.clear();
68      sortedUniqueValues = null;
69      min = Long.MAX_VALUE;
70      max = Long.MIN_VALUE;
71      maxDelta = Long.MIN_VALUE;
72      bytesPerIndex = 0;
73      bytesPerDelta = 0;
74      totalCompressedBytes = 0;
75    }
76  
77  
78    /************* methods ***************************/
79  
80    public void add(long timestamp) {
81      uniqueValues.add(timestamp);
82    }
83  
84    public LongEncoder compile() {
85      int numUnique = uniqueValues.size();
86      if (numUnique == 1) {
87        min = CollectionUtils.getFirst(uniqueValues);
88        sortedUniqueValues = new long[] { min };
89        return this;
90      }
91  
92      sortedUniqueValues = new long[numUnique];
93      int lastIndex = -1;
94      for (long value : uniqueValues) {
95        sortedUniqueValues[++lastIndex] = value;
96      }
97      Arrays.sort(sortedUniqueValues);
98      min = ArrayUtils.getFirst(sortedUniqueValues);
99      max = ArrayUtils.getLast(sortedUniqueValues);
100     maxDelta = max - min;
101     if (maxDelta > 0) {
102       bytesPerDelta = UFIntTool.numBytes(maxDelta);
103     } else {
104       bytesPerDelta = 0;
105     }
106 
107     int maxIndex = numUnique - 1;
108     bytesPerIndex = UFIntTool.numBytes(maxIndex);
109 
110     totalCompressedBytes = numUnique * bytesPerDelta;
111 
112     return this;
113   }
114 
115   public long getDelta(int index) {
116     if (sortedUniqueValues.length == 0) {
117       return 0;
118     }
119     return sortedUniqueValues[index] - min;
120   }
121 
122   public int getIndex(long value) {
123     // should always find an exact match
124     return Arrays.binarySearch(sortedUniqueValues, value);
125   }
126 
127   public void writeBytes(OutputStream os) throws IOException {
128     for (int i = 0; i < sortedUniqueValues.length; ++i) {
129       long delta = sortedUniqueValues[i] - min;
130       UFIntTool.writeBytes(bytesPerDelta, delta, os);
131     }
132   }
133 
134   //convenience method for tests
135   public byte[] getByteArray() throws IOException{
136     ByteArrayOutputStream baos = new ByteArrayOutputStream();
137     writeBytes(baos);
138     return baos.toByteArray();
139   }
140 
141   public int getOutputArrayLength() {
142     return sortedUniqueValues.length * bytesPerDelta;
143   }
144 
145   public int getNumUniqueValues() {
146     return sortedUniqueValues.length;
147   }
148 
149 
150   /******************* Object methods **********************/
151 
152   @Override
153   public String toString() {
154     if (ArrayUtils.isEmpty(sortedUniqueValues)) {
155       return "[]";
156     }
157     return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
158   }
159 
160 
161   /******************** get/set **************************/
162 
163   public long getMin() {
164     return min;
165   }
166 
167   public int getBytesPerDelta() {
168     return bytesPerDelta;
169   }
170 
171   public int getBytesPerIndex() {
172     return bytesPerIndex;
173   }
174 
175   public int getTotalCompressedBytes() {
176     return totalCompressedBytes;
177   }
178 
179   public long[] getSortedUniqueTimestamps() {
180     return sortedUniqueValues;
181   }
182 
183 }