001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import static java.lang.Integer.rotateLeft;
021
022import java.io.FileInputStream;
023import java.io.IOException;
024import org.apache.yetus.audience.InterfaceAudience;
025import org.apache.yetus.audience.InterfaceStability;
026
027/**
028 * Produces 32-bit hash for hash table lookup.
029 *
030 * <pre>
031 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
032 *
033 * You can use this free for any purpose.  It's in the public domain.
034 * It has no warranty.
035 * </pre>
036 *
037 * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a>
038 * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this function compares to
039 *      others such as CRC, MD?, etc</a>
040 * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the Dr. Dobbs
041 *      Article</a>
042 */
043@InterfaceAudience.Private
044@InterfaceStability.Stable
045public class JenkinsHash extends Hash {
046  private static final int BYTE_MASK = 0xff;
047
048  private static JenkinsHash _instance = new JenkinsHash();
049
050  public static Hash getInstance() {
051    return _instance;
052  }
053
054  /**
055   * Compute the hash of the specified file
056   * @param args name of file to compute hash of.
057   * @throws IOException e
058   */
059  public static void main(String[] args) throws IOException {
060    if (args.length != 1) {
061      System.err.println("Usage: JenkinsHash filename");
062      System.exit(-1);
063    }
064    FileInputStream in = new FileInputStream(args[0]);
065    byte[] bytes = new byte[512];
066    int value = 0;
067    JenkinsHash hash = new JenkinsHash();
068    try {
069      for (int length = in.read(bytes); length > 0; length = in.read(bytes)) {
070        value = hash.hash(new ByteArrayHashKey(bytes, 0, length), value);
071      }
072    } finally {
073      in.close();
074    }
075    System.out.println(Math.abs(value));
076  }
077
078  /**
079   * taken from hashlittle() -- hash a variable-length key into a 32-bit value
080   * @param hashKey the key to extract the bytes for hash algo
081   * @param initval can be any integer value
082   * @return a 32-bit value. Every bit of the key affects every bit of the return value. Two keys
083   *         differing by one or two bits will have totally different hash values.
084   *         <p>
085   *         The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is
086   *         sooo slow!). If you need less than 32 bits, use a bitmask. For example, if you need
087   *         only 10 bits, do <code>h = (h &amp; hashmask(10));</code> In which case, the hash table
088   *         should have hashsize(10) elements.
089   *         <p>
090   *         If you are hashing n strings byte[][] k, do it like this: for (int i = 0, h = 0; i &lt;
091   *         n; ++i) h = hash( k[i], h);
092   *         <p>
093   *         By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this code any way you
094   *         wish, private, educational, or commercial. It's free.
095   *         <p>
096   *         Use for hash table lookup, or anything where one collision in 2^^32 is acceptable. Do
097   *         NOT use for cryptographic purposes.
098   */
099  @SuppressWarnings({ "fallthrough", "MissingDefault" })
100  @Override
101  public <T> int hash(HashKey<T> hashKey, int initval) {
102    int length = hashKey.length();
103    int a, b, c;
104    a = b = c = 0xdeadbeef + length + initval;
105    int offset = 0;
106    for (; length > 12; offset += 12, length -= 12) {
107      a += hashKey.getIntLE(offset);
108      b += hashKey.getIntLE(offset + 4);
109      c += hashKey.getIntLE(offset + 8);
110
111      /*
112       * mix -- mix 3 32-bit values reversibly. This is reversible, so any information in (a,b,c)
113       * before mix() is still in (a,b,c) after mix(). If four pairs of (a,b,c) inputs are run
114       * through mix(), or through mix() in reverse, there are at least 32 bits of the output that
115       * are sometimes the same for one pair and different for another pair. This was tested for: -
116       * pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), or
117       * in any combination of bottom bits of (a,b,c). - "differ" is defined as +, -, ^, or ~^. For
118       * + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is
119       * commonly produced by subtraction) look like a single 1-bit difference. - the base values
120       * were pseudorandom, all zero but one bit set, or all zero plus a counter that starts at
121       * zero. Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this are 4 6
122       * 8 16 19 4 9 15 3 18 27 15 14 9 3 7 17 3 Well, "9 15 3 18 27 15" didn't quite get 32 bits
123       * diffing for "differ" defined as + with a one-bit base and a two-bit delta. I used
124       * http://burtleburtle.net/bob/hash/avalanche.html to choose the operations, constants, and
125       * arrangements of the variables. This does not achieve avalanche. There are input bits of
126       * (a,b,c) that fail to affect some output bits of (a,b,c), especially of a. The most
127       * thoroughly mixed value is c, but it doesn't really even achieve avalanche in c. This allows
128       * some parallelism. Read-after-writes are good at doubling the number of bits affected, so
129       * the goal of mixing pulls in the opposite direction as the goal of parallelism. I did what I
130       * could. Rotates seem to cost as much as shifts on every machine I could lay my hands on, and
131       * rotates are much kinder to the top and bottom bits, so I used rotates. #define mix(a,b,c) \
132       * { \ a -= c; a ^= rot(c, 4); c += b; \ b -= a; b ^= rot(a, 6); a += c; \ c -= b; c ^= rot(b,
133       * 8); b += a; \ a -= c; a ^= rot(c,16); c += b; \ b -= a; b ^= rot(a,19); a += c; \ c -= b; c
134       * ^= rot(b, 4); b += a; \ } mix(a,b,c);
135       */
136      a -= c;
137      a ^= rotateLeft(c, 4);
138      c += b;
139      b -= a;
140      b ^= rotateLeft(a, 6);
141      a += c;
142      c -= b;
143      c ^= rotateLeft(b, 8);
144      b += a;
145      a -= c;
146      a ^= rotateLeft(c, 16);
147      c += b;
148      b -= a;
149      b ^= rotateLeft(a, 19);
150      a += c;
151      c -= b;
152      c ^= rotateLeft(b, 4);
153      b += a;
154    }
155
156    // -------------------------------- last block: affect all 32 bits of (c)
157    switch (length) { // all the case statements fall through
158      case 12:
159        c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24);
160      case 11:
161        c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16);
162      case 10:
163        c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8);
164      case 9:
165        c += (hashKey.get(offset + 8) & BYTE_MASK);
166      case 8:
167        b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24);
168      case 7:
169        b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16);
170      case 6:
171        b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8);
172      case 5:
173        b += (hashKey.get(offset + 4) & BYTE_MASK);
174      case 4:
175        a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24);
176      case 3:
177        a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16);
178      case 2:
179        a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8);
180      case 1:
181        // noinspection PointlessArithmeticExpression
182        a += (hashKey.get(offset + 0) & BYTE_MASK);
183        break;
184      case 0:
185        return c;
186    }
187    /*
188     * final -- final mixing of 3 32-bit values (a,b,c) into c Pairs of (a,b,c) values differing in
189     * only a few bits will usually produce values of c that look totally different. This was tested
190     * for - pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c),
191     * or in any combination of bottom bits of (a,b,c). - "differ" is defined as +, -, ^, or ~^. For
192     * + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is
193     * commonly produced by subtraction) look like a single 1-bit difference. - the base values were
194     * pseudorandom, all zero but one bit set, or all zero plus a counter that starts at zero. These
195     * constants passed: 14 11 25 16 4 14 24 12 14 25 16 4 14 24 and these came close: 4 8 15 26 3
196     * 22 24 10 8 15 26 3 22 24 11 8 15 26 3 22 24 #define final(a,b,c) \ { c ^= b; c -= rot(b,14);
197     * \ a ^= c; a -= rot(c,11); \ b ^= a; b -= rot(a,25); \ c ^= b; c -= rot(b,16); \ a ^= c; a -=
198     * rot(c,4); \ b ^= a; b -= rot(a,14); \ c ^= b; c -= rot(b,24); \ }
199     */
200    c ^= b;
201    c -= rotateLeft(b, 14);
202    a ^= c;
203    a -= rotateLeft(c, 11);
204    b ^= a;
205    b -= rotateLeft(a, 25);
206    c ^= b;
207    c -= rotateLeft(b, 16);
208    a ^= c;
209    a -= rotateLeft(c, 4);
210    b ^= a;
211    b -= rotateLeft(a, 14);
212    c ^= b;
213    c -= rotateLeft(b, 24);
214    return c;
215  }
216}