View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import static java.lang.Integer.rotateLeft;
23  
24  import java.io.FileInputStream;
25  import java.io.IOException;
26  
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.classification.InterfaceStability;
29  
30  /**
31   * Produces 32-bit hash for hash table lookup.
32   *
33   * <pre>lookup3.c, by Bob Jenkins, May 2006, Public Domain.
34   *
35   * You can use this free for any purpose.  It's in the public domain.
36   * It has no warranty.
37   * </pre>
38   *
39   * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a>
40   * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this
41   * function compares to others such as CRC, MD?, etc</a>
42   * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the
43   * Dr. Dobbs Article</a>
44   */
45  @InterfaceAudience.Private
46  @InterfaceStability.Stable
47  public class JenkinsHash extends Hash {
48    private static final int BYTE_MASK = 0xff;
49  
50    private static JenkinsHash _instance = new JenkinsHash();
51  
52    public static Hash getInstance() {
53      return _instance;
54    }
55  
56    /**
57     * Compute the hash of the specified file
58     * @param args name of file to compute hash of.
59     * @throws IOException e
60     */
61    public static void main(String[] args) throws IOException {
62      if (args.length != 1) {
63        System.err.println("Usage: JenkinsHash filename");
64        System.exit(-1);
65      }
66      FileInputStream in = new FileInputStream(args[0]);
67      byte[] bytes = new byte[512];
68      int value = 0;
69      JenkinsHash hash = new JenkinsHash();
70      try {
71        for (int length = in.read(bytes); length > 0; length = in.read(bytes)) {
72          value = hash.hash(new ByteArrayHashKey(bytes, 0, length), value);
73        }
74      } finally {
75        in.close();
76      }
77      System.out.println(Math.abs(value));
78    }
79
80    /**
81     * taken from  hashlittle() -- hash a variable-length key into a 32-bit value
82     *
83     * @param hashKey the key to extract the  bytes for hash algo
84     * @param initval can be any integer value
85     * @return a 32-bit value.  Every bit of the key affects every bit of the
86     * return value.  Two keys differing by one or two bits will have totally
87     * different hash values.
88     *
89     * <p>The best hash table sizes are powers of 2.  There is no need to do mod
90     * a prime (mod is sooo slow!).  If you need less than 32 bits, use a bitmask.
91     * For example, if you need only 10 bits, do
92     * <code>h = (h &amp; hashmask(10));</code>
93     * In which case, the hash table should have hashsize(10) elements.
94     *
95     * <p>If you are hashing n strings byte[][] k, do it like this:
96     * for (int i = 0, h = 0; i &lt; n; ++i) h = hash( k[i], h);
97     *
98     * <p>By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
99     * code any way you wish, private, educational, or commercial.  It's free.
100    *
101    * <p>Use for hash table lookup, or anything where one collision in 2^^32 is
102    * acceptable.  Do NOT use for cryptographic purposes.
103   */
104   @SuppressWarnings("fallthrough")
105   @Override
106   public <T> int hash(HashKey<T> hashKey, int initval) {
107     int length = hashKey.length();
108     int a, b, c;
109     a = b = c = 0xdeadbeef + length + initval;
110     int offset = 0;
111     for (; length > 12; offset += 12, length -= 12) {
112       a += (hashKey.get(offset) & BYTE_MASK);
113       a += ((hashKey.get(offset + 1) & BYTE_MASK) <<  8);
114       a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16);
115       a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24);
116       b += (hashKey.get(offset + 4) & BYTE_MASK);
117       b += ((hashKey.get(offset + 5) & BYTE_MASK) <<  8);
118       b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16);
119       b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24);
120       c += (hashKey.get(offset + 8) & BYTE_MASK);
121       c += ((hashKey.get(offset + 9) & BYTE_MASK) <<  8);
122       c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16);
123       c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24);
124
125       /*
126        * mix -- mix 3 32-bit values reversibly.
127        * This is reversible, so any information in (a,b,c) before mix() is
128        * still in (a,b,c) after mix().
129        *
130        * If four pairs of (a,b,c) inputs are run through mix(), or through
131        * mix() in reverse, there are at least 32 bits of the output that
132        * are sometimes the same for one pair and different for another pair.
133        *
134        * This was tested for:
135        * - pairs that differed by one bit, by two bits, in any combination
136        *   of top bits of (a,b,c), or in any combination of bottom bits of
137        *   (a,b,c).
138        * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
139        *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
140        *    is commonly produced by subtraction) look like a single 1-bit
141        *    difference.
142        * - the base values were pseudorandom, all zero but one bit set, or
143        *   all zero plus a counter that starts at zero.
144        *
145        * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
146        * satisfy this are
147        *     4  6  8 16 19  4
148        *     9 15  3 18 27 15
149        *    14  9  3  7 17  3
150        * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for
151        * "differ" defined as + with a one-bit base and a two-bit delta.  I
152        * used http://burtleburtle.net/bob/hash/avalanche.html to choose
153        * the operations, constants, and arrangements of the variables.
154        *
155        * This does not achieve avalanche.  There are input bits of (a,b,c)
156        * that fail to affect some output bits of (a,b,c), especially of a.
157        * The most thoroughly mixed value is c, but it doesn't really even
158        * achieve avalanche in c.
159        *
160        * This allows some parallelism.  Read-after-writes are good at doubling
161        * the number of bits affected, so the goal of mixing pulls in the
162        * opposite direction as the goal of parallelism.  I did what I could.
163        * Rotates seem to cost as much as shifts on every machine I could lay
164        * my hands on, and rotates are much kinder to the top and bottom bits,
165        * so I used rotates.
166        *
167        * #define mix(a,b,c) \
168        * { \
169        *   a -= c;  a ^= rot(c, 4);  c += b; \
170        *   b -= a;  b ^= rot(a, 6);  a += c; \
171        *   c -= b;  c ^= rot(b, 8);  b += a; \
172        *   a -= c;  a ^= rot(c,16);  c += b; \
173        *   b -= a;  b ^= rot(a,19);  a += c; \
174        *   c -= b;  c ^= rot(b, 4);  b += a; \
175        * }
176        *
177        * mix(a,b,c);
178        */
179       a -= c; a ^= rotateLeft(c, 4); c += b;
180       b -= a; b ^= rotateLeft(a, 6); a += c;
181       c -= b; c ^= rotateLeft(b, 8); b += a;
182       a -= c; a ^= rotateLeft(c, 16); c += b;
183       b -= a; b ^= rotateLeft(a, 19); a += c;
184       c -= b; c ^= rotateLeft(b, 4); b += a;
185     }
186
187     //-------------------------------- last block: affect all 32 bits of (c)
188     switch (length) {                   // all the case statements fall through
189     case 12:
190       c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24);
191     case 11:
192       c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16);
193     case 10:
194       c += ((hashKey.get(offset + 9) & BYTE_MASK) <<  8);
195     case  9:
196       c += (hashKey.get(offset + 8) & BYTE_MASK);
197     case  8:
198       b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24);
199     case  7:
200       b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16);
201     case  6:
202       b += ((hashKey.get(offset + 5) & BYTE_MASK) <<  8);
203     case  5:
204       b += (hashKey.get(offset + 4) & BYTE_MASK);
205     case  4:
206       a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24);
207     case  3:
208       a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16);
209     case  2:
210       a += ((hashKey.get(offset + 1) & BYTE_MASK) <<  8);
211     case  1:
212       //noinspection PointlessArithmeticExpression
213       a += (hashKey.get(offset + 0) & BYTE_MASK);
214       break;
215     case  0:
216       return c;
217     }
218     /*
219      * final -- final mixing of 3 32-bit values (a,b,c) into c
220      *
221      * Pairs of (a,b,c) values differing in only a few bits will usually
222      * produce values of c that look totally different.  This was tested for
223      * - pairs that differed by one bit, by two bits, in any combination
224      *   of top bits of (a,b,c), or in any combination of bottom bits of
225      *   (a,b,c).
226      *
227      * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
228      *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
229      *   is commonly produced by subtraction) look like a single 1-bit
230      *   difference.
231      *
232      * - the base values were pseudorandom, all zero but one bit set, or
233      *   all zero plus a counter that starts at zero.
234      *
235      * These constants passed:
236      *   14 11 25 16 4 14 24
237      *   12 14 25 16 4 14 24
238      * and these came close:
239      *    4  8 15 26 3 22 24
240      *   10  8 15 26 3 22 24
241      *   11  8 15 26 3 22 24
242      *
243      * #define final(a,b,c) \
244      * {
245      *   c ^= b; c -= rot(b,14); \
246      *   a ^= c; a -= rot(c,11); \
247      *   b ^= a; b -= rot(a,25); \
248      *   c ^= b; c -= rot(b,16); \
249      *   a ^= c; a -= rot(c,4);  \
250      *   b ^= a; b -= rot(a,14); \
251      *   c ^= b; c -= rot(b,24); \
252      * }
253      *
254      */
255     c ^= b; c -= rotateLeft(b, 14);
256     a ^= c; a -= rotateLeft(c, 11);
257     b ^= a; b -= rotateLeft(a, 25);
258     c ^= b; c -= rotateLeft(b, 16);
259     a ^= c; a -= rotateLeft(c, 4);
260     b ^= a; b -= rotateLeft(a, 14);
261     c ^= b; c -= rotateLeft(b, 24);
262     return c;
263   }
264 }