Source code

001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.util;
021
022import static java.lang.Integer.rotateLeft;
023
024import java.io.FileInputStream;
025import java.io.IOException;
026
027import org.apache.yetus.audience.InterfaceAudience;
028import org.apache.yetus.audience.InterfaceStability;
029
030/**
031 * Produces 32-bit hash for hash table lookup.
032 *
033 * <pre>lookup3.c, by Bob Jenkins, May 2006, Public Domain.
034 *
035 * You can use this free for any purpose.  It's in the public domain.
036 * It has no warranty.
037 * </pre>
038 *
039 * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a>
040 * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this
041 * function compares to others such as CRC, MD?, etc</a>
042 * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the
043 * Dr. Dobbs Article</a>
044 */
045@InterfaceAudience.Private
046@InterfaceStability.Stable
047public class JenkinsHash extends Hash {
048  private static final int BYTE_MASK = 0xff;
049
050  private static JenkinsHash _instance = new JenkinsHash();
051
052  public static Hash getInstance() {
053    return _instance;
054  }
055
056  /**
057   * Compute the hash of the specified file
058   * @param args name of file to compute hash of.
059   * @throws IOException e
060   */
061  public static void main(String[] args) throws IOException {
062    if (args.length != 1) {
063      System.err.println("Usage: JenkinsHash filename");
064      System.exit(-1);
065    }
066    FileInputStream in = new FileInputStream(args[0]);
067    byte[] bytes = new byte[512];
068    int value = 0;
069    JenkinsHash hash = new JenkinsHash();
070    try {
071      for (int length = in.read(bytes); length > 0; length = in.read(bytes)) {
072        value = hash.hash(new ByteArrayHashKey(bytes, 0, length), value);
073      }
074    } finally {
075      in.close();
076    }
077    System.out.println(Math.abs(value));
078  }
079
080  /**
081   * taken from  hashlittle() -- hash a variable-length key into a 32-bit value
082   *
083   * @param hashKey the key to extract the  bytes for hash algo
084   * @param initval can be any integer value
085   * @return a 32-bit value.  Every bit of the key affects every bit of the
086   * return value.  Two keys differing by one or two bits will have totally
087   * different hash values.
088   *
089   * <p>The best hash table sizes are powers of 2.  There is no need to do mod
090   * a prime (mod is sooo slow!).  If you need less than 32 bits, use a bitmask.
091   * For example, if you need only 10 bits, do
092   * <code>h = (h &amp; hashmask(10));</code>
093   * In which case, the hash table should have hashsize(10) elements.
094   *
095   * <p>If you are hashing n strings byte[][] k, do it like this:
096   * for (int i = 0, h = 0; i &lt; n; ++i) h = hash( k[i], h);
097   *
098   * <p>By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
099   * code any way you wish, private, educational, or commercial.  It's free.
100   *
101   * <p>Use for hash table lookup, or anything where one collision in 2^^32 is
102   * acceptable.  Do NOT use for cryptographic purposes.
103  */
104  @SuppressWarnings("fallthrough")
105  @Override
106  public <T> int hash(HashKey<T> hashKey, int initval) {
107    int length = hashKey.length();
108    int a, b, c;
109    a = b = c = 0xdeadbeef + length + initval;
110    int offset = 0;
111    for (; length > 12; offset += 12, length -= 12) {
112      a += (hashKey.get(offset) & BYTE_MASK);
113      a += ((hashKey.get(offset + 1) & BYTE_MASK) <<  8);
114      a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16);
115      a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24);
116      b += (hashKey.get(offset + 4) & BYTE_MASK);
117      b += ((hashKey.get(offset + 5) & BYTE_MASK) <<  8);
118      b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16);
119      b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24);
120      c += (hashKey.get(offset + 8) & BYTE_MASK);
121      c += ((hashKey.get(offset + 9) & BYTE_MASK) <<  8);
122      c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16);
123      c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24);
124
125      /*
126       * mix -- mix 3 32-bit values reversibly.
127       * This is reversible, so any information in (a,b,c) before mix() is
128       * still in (a,b,c) after mix().
129       *
130       * If four pairs of (a,b,c) inputs are run through mix(), or through
131       * mix() in reverse, there are at least 32 bits of the output that
132       * are sometimes the same for one pair and different for another pair.
133       *
134       * This was tested for:
135       * - pairs that differed by one bit, by two bits, in any combination
136       *   of top bits of (a,b,c), or in any combination of bottom bits of
137       *   (a,b,c).
138       * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
139       *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
140       *    is commonly produced by subtraction) look like a single 1-bit
141       *    difference.
142       * - the base values were pseudorandom, all zero but one bit set, or
143       *   all zero plus a counter that starts at zero.
144       *
145       * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
146       * satisfy this are
147       *     4  6  8 16 19  4
148       *     9 15  3 18 27 15
149       *    14  9  3  7 17  3
150       * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for
151       * "differ" defined as + with a one-bit base and a two-bit delta.  I
152       * used http://burtleburtle.net/bob/hash/avalanche.html to choose
153       * the operations, constants, and arrangements of the variables.
154       *
155       * This does not achieve avalanche.  There are input bits of (a,b,c)
156       * that fail to affect some output bits of (a,b,c), especially of a.
157       * The most thoroughly mixed value is c, but it doesn't really even
158       * achieve avalanche in c.
159       *
160       * This allows some parallelism.  Read-after-writes are good at doubling
161       * the number of bits affected, so the goal of mixing pulls in the
162       * opposite direction as the goal of parallelism.  I did what I could.
163       * Rotates seem to cost as much as shifts on every machine I could lay
164       * my hands on, and rotates are much kinder to the top and bottom bits,
165       * so I used rotates.
166       *
167       * #define mix(a,b,c) \
168       * { \
169       *   a -= c;  a ^= rot(c, 4);  c += b; \
170       *   b -= a;  b ^= rot(a, 6);  a += c; \
171       *   c -= b;  c ^= rot(b, 8);  b += a; \
172       *   a -= c;  a ^= rot(c,16);  c += b; \
173       *   b -= a;  b ^= rot(a,19);  a += c; \
174       *   c -= b;  c ^= rot(b, 4);  b += a; \
175       * }
176       *
177       * mix(a,b,c);
178       */
179      a -= c; a ^= rotateLeft(c, 4); c += b;
180      b -= a; b ^= rotateLeft(a, 6); a += c;
181      c -= b; c ^= rotateLeft(b, 8); b += a;
182      a -= c; a ^= rotateLeft(c, 16); c += b;
183      b -= a; b ^= rotateLeft(a, 19); a += c;
184      c -= b; c ^= rotateLeft(b, 4); b += a;
185    }
186
187    //-------------------------------- last block: affect all 32 bits of (c)
188    switch (length) {                   // all the case statements fall through
189    case 12:
190      c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24);
191    case 11:
192      c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16);
193    case 10:
194      c += ((hashKey.get(offset + 9) & BYTE_MASK) <<  8);
195    case  9:
196      c += (hashKey.get(offset + 8) & BYTE_MASK);
197    case  8:
198      b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24);
199    case  7:
200      b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16);
201    case  6:
202      b += ((hashKey.get(offset + 5) & BYTE_MASK) <<  8);
203    case  5:
204      b += (hashKey.get(offset + 4) & BYTE_MASK);
205    case  4:
206      a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24);
207    case  3:
208      a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16);
209    case  2:
210      a += ((hashKey.get(offset + 1) & BYTE_MASK) <<  8);
211    case  1:
212      //noinspection PointlessArithmeticExpression
213      a += (hashKey.get(offset + 0) & BYTE_MASK);
214      break;
215    case  0:
216      return c;
217    }
218    /*
219     * final -- final mixing of 3 32-bit values (a,b,c) into c
220     *
221     * Pairs of (a,b,c) values differing in only a few bits will usually
222     * produce values of c that look totally different.  This was tested for
223     * - pairs that differed by one bit, by two bits, in any combination
224     *   of top bits of (a,b,c), or in any combination of bottom bits of
225     *   (a,b,c).
226     *
227     * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
228     *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
229     *   is commonly produced by subtraction) look like a single 1-bit
230     *   difference.
231     *
232     * - the base values were pseudorandom, all zero but one bit set, or
233     *   all zero plus a counter that starts at zero.
234     *
235     * These constants passed:
236     *   14 11 25 16 4 14 24
237     *   12 14 25 16 4 14 24
238     * and these came close:
239     *    4  8 15 26 3 22 24
240     *   10  8 15 26 3 22 24
241     *   11  8 15 26 3 22 24
242     *
243     * #define final(a,b,c) \
244     * {
245     *   c ^= b; c -= rot(b,14); \
246     *   a ^= c; a -= rot(c,11); \
247     *   b ^= a; b -= rot(a,25); \
248     *   c ^= b; c -= rot(b,16); \
249     *   a ^= c; a -= rot(c,4);  \
250     *   b ^= a; b -= rot(a,14); \
251     *   c ^= b; c -= rot(b,24); \
252     * }
253     *
254     */
255    c ^= b; c -= rotateLeft(b, 14);
256    a ^= c; a -= rotateLeft(c, 11);
257    b ^= a; b -= rotateLeft(a, 25);
258    c ^= b; c -= rotateLeft(b, 16);
259    a ^= c; a -= rotateLeft(c, 4);
260    b ^= a; b -= rotateLeft(a, 14);
261    c ^= b; c -= rotateLeft(b, 24);
262    return c;
263  }
264}