Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import static java.lang.Integer.rotateLeft;
021
022import java.io.FileInputStream;
023import java.io.IOException;
024import org.apache.yetus.audience.InterfaceAudience;
025import org.apache.yetus.audience.InterfaceStability;
026
027/**
028 * Produces 32-bit hash for hash table lookup.
029 *
030 * <pre>
031 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
032 *
033 * You can use this free for any purpose.  It's in the public domain.
034 * It has no warranty.
035 * </pre>
036 *
037 * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a>
038 * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this function compares to
039 *      others such as CRC, MD?, etc</a>
040 * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the Dr. Dobbs
041 *      Article</a>
042 */
043@InterfaceAudience.Private
044@InterfaceStability.Stable
045public class JenkinsHash extends Hash {
046  private static final int BYTE_MASK = 0xff;
047
048  private static JenkinsHash _instance = new JenkinsHash();
049
050  public static Hash getInstance() {
051    return _instance;
052  }
053
054  /**
055   * Compute the hash of the specified file
056   * @param args name of file to compute hash of.
057   * @throws IOException e
058   */
059  public static void main(String[] args) throws IOException {
060    if (args.length != 1) {
061      System.err.println("Usage: JenkinsHash filename");
062      System.exit(-1);
063    }
064    FileInputStream in = new FileInputStream(args[0]);
065    byte[] bytes = new byte[512];
066    int value = 0;
067    JenkinsHash hash = new JenkinsHash();
068    try {
069      for (int length = in.read(bytes); length > 0; length = in.read(bytes)) {
070        value = hash.hash(new ByteArrayHashKey(bytes, 0, length), value);
071      }
072    } finally {
073      in.close();
074    }
075    System.out.println(Math.abs(value));
076  }
077
078  /**
079   * taken from hashlittle() -- hash a variable-length key into a 32-bit value
080   * @param hashKey the key to extract the bytes for hash algo
081   * @param initval can be any integer value
082   * @return a 32-bit value. Every bit of the key affects every bit of the return value. Two keys
083   *         differing by one or two bits will have totally different hash values.
084   *         <p>
085   *         The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is
086   *         sooo slow!). If you need less than 32 bits, use a bitmask. For example, if you need
087   *         only 10 bits, do <code>h = (h &amp; hashmask(10));</code> In which case, the hash table
088   *         should have hashsize(10) elements.
089   *         <p>
090   *         If you are hashing n strings byte[][] k, do it like this: for (int i = 0, h = 0; i &lt;
091   *         n; ++i) h = hash( k[i], h);
092   *         <p>
093   *         By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this code any way you
094   *         wish, private, educational, or commercial. It's free.
095   *         <p>
096   *         Use for hash table lookup, or anything where one collision in 2^^32 is acceptable. Do
097   *         NOT use for cryptographic purposes.
098   */
099  @SuppressWarnings({ "fallthrough", "MissingDefault" })
100  @Override
101  public <T> int hash(HashKey<T> hashKey, int initval) {
102    int length = hashKey.length();
103    int a, b, c;
104    a = b = c = 0xdeadbeef + length + initval;
105    int offset = 0;
106    for (; length > 12; offset += 12, length -= 12) {
107      a += (hashKey.get(offset) & BYTE_MASK);
108      a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8);
109      a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16);
110      a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24);
111      b += (hashKey.get(offset + 4) & BYTE_MASK);
112      b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8);
113      b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16);
114      b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24);
115      c += (hashKey.get(offset + 8) & BYTE_MASK);
116      c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8);
117      c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16);
118      c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24);
119
120      /*
121       * mix -- mix 3 32-bit values reversibly. This is reversible, so any information in (a,b,c)
122       * before mix() is still in (a,b,c) after mix(). If four pairs of (a,b,c) inputs are run
123       * through mix(), or through mix() in reverse, there are at least 32 bits of the output that
124       * are sometimes the same for one pair and different for another pair. This was tested for: -
125       * pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), or
126       * in any combination of bottom bits of (a,b,c). - "differ" is defined as +, -, ^, or ~^. For
127       * + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is
128       * commonly produced by subtraction) look like a single 1-bit difference. - the base values
129       * were pseudorandom, all zero but one bit set, or all zero plus a counter that starts at
130       * zero. Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this are 4 6
131       * 8 16 19 4 9 15 3 18 27 15 14 9 3 7 17 3 Well, "9 15 3 18 27 15" didn't quite get 32 bits
132       * diffing for "differ" defined as + with a one-bit base and a two-bit delta. I used
133       * http://burtleburtle.net/bob/hash/avalanche.html to choose the operations, constants, and
134       * arrangements of the variables. This does not achieve avalanche. There are input bits of
135       * (a,b,c) that fail to affect some output bits of (a,b,c), especially of a. The most
136       * thoroughly mixed value is c, but it doesn't really even achieve avalanche in c. This allows
137       * some parallelism. Read-after-writes are good at doubling the number of bits affected, so
138       * the goal of mixing pulls in the opposite direction as the goal of parallelism. I did what I
139       * could. Rotates seem to cost as much as shifts on every machine I could lay my hands on, and
140       * rotates are much kinder to the top and bottom bits, so I used rotates. #define mix(a,b,c) \
141       * { \ a -= c; a ^= rot(c, 4); c += b; \ b -= a; b ^= rot(a, 6); a += c; \ c -= b; c ^= rot(b,
142       * 8); b += a; \ a -= c; a ^= rot(c,16); c += b; \ b -= a; b ^= rot(a,19); a += c; \ c -= b; c
143       * ^= rot(b, 4); b += a; \ } mix(a,b,c);
144       */
145      a -= c;
146      a ^= rotateLeft(c, 4);
147      c += b;
148      b -= a;
149      b ^= rotateLeft(a, 6);
150      a += c;
151      c -= b;
152      c ^= rotateLeft(b, 8);
153      b += a;
154      a -= c;
155      a ^= rotateLeft(c, 16);
156      c += b;
157      b -= a;
158      b ^= rotateLeft(a, 19);
159      a += c;
160      c -= b;
161      c ^= rotateLeft(b, 4);
162      b += a;
163    }
164
165    // -------------------------------- last block: affect all 32 bits of (c)
166    switch (length) { // all the case statements fall through
167      case 12:
168        c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24);
169      case 11:
170        c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16);
171      case 10:
172        c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8);
173      case 9:
174        c += (hashKey.get(offset + 8) & BYTE_MASK);
175      case 8:
176        b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24);
177      case 7:
178        b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16);
179      case 6:
180        b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8);
181      case 5:
182        b += (hashKey.get(offset + 4) & BYTE_MASK);
183      case 4:
184        a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24);
185      case 3:
186        a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16);
187      case 2:
188        a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8);
189      case 1:
190        // noinspection PointlessArithmeticExpression
191        a += (hashKey.get(offset + 0) & BYTE_MASK);
192        break;
193      case 0:
194        return c;
195    }
196    /*
197     * final -- final mixing of 3 32-bit values (a,b,c) into c Pairs of (a,b,c) values differing in
198     * only a few bits will usually produce values of c that look totally different. This was tested
199     * for - pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c),
200     * or in any combination of bottom bits of (a,b,c). - "differ" is defined as +, -, ^, or ~^. For
201     * + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is
202     * commonly produced by subtraction) look like a single 1-bit difference. - the base values were
203     * pseudorandom, all zero but one bit set, or all zero plus a counter that starts at zero. These
204     * constants passed: 14 11 25 16 4 14 24 12 14 25 16 4 14 24 and these came close: 4 8 15 26 3
205     * 22 24 10 8 15 26 3 22 24 11 8 15 26 3 22 24 #define final(a,b,c) \ { c ^= b; c -= rot(b,14);
206     * \ a ^= c; a -= rot(c,11); \ b ^= a; b -= rot(a,25); \ c ^= b; c -= rot(b,16); \ a ^= c; a -=
207     * rot(c,4); \ b ^= a; b -= rot(a,14); \ c ^= b; c -= rot(b,24); \ }
208     */
209    c ^= b;
210    c -= rotateLeft(b, 14);
211    a ^= c;
212    a -= rotateLeft(c, 11);
213    b ^= a;
214    b -= rotateLeft(a, 25);
215    c ^= b;
216    c -= rotateLeft(b, 16);
217    a ^= c;
218    a -= rotateLeft(c, 4);
219    b ^= a;
220    b -= rotateLeft(a, 14);
221    c ^= b;
222    c -= rotateLeft(b, 24);
223    return c;
224  }
225}