001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.util; 021 022import static java.lang.Integer.rotateLeft; 023 024import java.io.FileInputStream; 025import java.io.IOException; 026 027import org.apache.yetus.audience.InterfaceAudience; 028import org.apache.yetus.audience.InterfaceStability; 029 030/** 031 * Produces 32-bit hash for hash table lookup. 032 * 033 * <pre>lookup3.c, by Bob Jenkins, May 2006, Public Domain. 034 * 035 * You can use this free for any purpose. It's in the public domain. 036 * It has no warranty. 037 * </pre> 038 * 039 * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a> 040 * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this 041 * function compares to others such as CRC, MD?, etc</a> 042 * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the 043 * Dr. Dobbs Article</a> 044 */ 045@InterfaceAudience.Private 046@InterfaceStability.Stable 047public class JenkinsHash extends Hash { 048 private static final int BYTE_MASK = 0xff; 049 050 private static JenkinsHash _instance = new JenkinsHash(); 051 052 public static Hash getInstance() { 053 return _instance; 054 } 055 056 /** 057 * Compute the hash of the specified file 058 * @param args name of file to compute hash of. 059 * @throws IOException e 060 */ 061 public static void main(String[] args) throws IOException { 062 if (args.length != 1) { 063 System.err.println("Usage: JenkinsHash filename"); 064 System.exit(-1); 065 } 066 FileInputStream in = new FileInputStream(args[0]); 067 byte[] bytes = new byte[512]; 068 int value = 0; 069 JenkinsHash hash = new JenkinsHash(); 070 try { 071 for (int length = in.read(bytes); length > 0; length = in.read(bytes)) { 072 value = hash.hash(new ByteArrayHashKey(bytes, 0, length), value); 073 } 074 } finally { 075 in.close(); 076 } 077 System.out.println(Math.abs(value)); 078 } 079 080 /** 081 * taken from hashlittle() -- hash a variable-length key into a 32-bit value 082 * 083 * @param hashKey the key to extract the bytes for hash algo 084 * @param initval can be any integer value 085 * @return a 32-bit value. Every bit of the key affects every bit of the 086 * return value. Two keys differing by one or two bits will have totally 087 * different hash values. 088 * 089 * <p>The best hash table sizes are powers of 2. There is no need to do mod 090 * a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. 091 * For example, if you need only 10 bits, do 092 * <code>h = (h & hashmask(10));</code> 093 * In which case, the hash table should have hashsize(10) elements. 094 * 095 * <p>If you are hashing n strings byte[][] k, do it like this: 096 * for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); 097 * 098 * <p>By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 099 * code any way you wish, private, educational, or commercial. It's free. 100 * 101 * <p>Use for hash table lookup, or anything where one collision in 2^^32 is 102 * acceptable. Do NOT use for cryptographic purposes. 103 */ 104 @SuppressWarnings("fallthrough") 105 @Override 106 public <T> int hash(HashKey<T> hashKey, int initval) { 107 int length = hashKey.length(); 108 int a, b, c; 109 a = b = c = 0xdeadbeef + length + initval; 110 int offset = 0; 111 for (; length > 12; offset += 12, length -= 12) { 112 a += (hashKey.get(offset) & BYTE_MASK); 113 a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8); 114 a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16); 115 a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24); 116 b += (hashKey.get(offset + 4) & BYTE_MASK); 117 b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8); 118 b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16); 119 b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24); 120 c += (hashKey.get(offset + 8) & BYTE_MASK); 121 c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8); 122 c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16); 123 c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24); 124 125 /* 126 * mix -- mix 3 32-bit values reversibly. 127 * This is reversible, so any information in (a,b,c) before mix() is 128 * still in (a,b,c) after mix(). 129 * 130 * If four pairs of (a,b,c) inputs are run through mix(), or through 131 * mix() in reverse, there are at least 32 bits of the output that 132 * are sometimes the same for one pair and different for another pair. 133 * 134 * This was tested for: 135 * - pairs that differed by one bit, by two bits, in any combination 136 * of top bits of (a,b,c), or in any combination of bottom bits of 137 * (a,b,c). 138 * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 139 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 140 * is commonly produced by subtraction) look like a single 1-bit 141 * difference. 142 * - the base values were pseudorandom, all zero but one bit set, or 143 * all zero plus a counter that starts at zero. 144 * 145 * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 146 * satisfy this are 147 * 4 6 8 16 19 4 148 * 9 15 3 18 27 15 149 * 14 9 3 7 17 3 150 * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for 151 * "differ" defined as + with a one-bit base and a two-bit delta. I 152 * used http://burtleburtle.net/bob/hash/avalanche.html to choose 153 * the operations, constants, and arrangements of the variables. 154 * 155 * This does not achieve avalanche. There are input bits of (a,b,c) 156 * that fail to affect some output bits of (a,b,c), especially of a. 157 * The most thoroughly mixed value is c, but it doesn't really even 158 * achieve avalanche in c. 159 * 160 * This allows some parallelism. Read-after-writes are good at doubling 161 * the number of bits affected, so the goal of mixing pulls in the 162 * opposite direction as the goal of parallelism. I did what I could. 163 * Rotates seem to cost as much as shifts on every machine I could lay 164 * my hands on, and rotates are much kinder to the top and bottom bits, 165 * so I used rotates. 166 * 167 * #define mix(a,b,c) \ 168 * { \ 169 * a -= c; a ^= rot(c, 4); c += b; \ 170 * b -= a; b ^= rot(a, 6); a += c; \ 171 * c -= b; c ^= rot(b, 8); b += a; \ 172 * a -= c; a ^= rot(c,16); c += b; \ 173 * b -= a; b ^= rot(a,19); a += c; \ 174 * c -= b; c ^= rot(b, 4); b += a; \ 175 * } 176 * 177 * mix(a,b,c); 178 */ 179 a -= c; a ^= rotateLeft(c, 4); c += b; 180 b -= a; b ^= rotateLeft(a, 6); a += c; 181 c -= b; c ^= rotateLeft(b, 8); b += a; 182 a -= c; a ^= rotateLeft(c, 16); c += b; 183 b -= a; b ^= rotateLeft(a, 19); a += c; 184 c -= b; c ^= rotateLeft(b, 4); b += a; 185 } 186 187 //-------------------------------- last block: affect all 32 bits of (c) 188 switch (length) { // all the case statements fall through 189 case 12: 190 c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24); 191 case 11: 192 c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16); 193 case 10: 194 c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8); 195 case 9: 196 c += (hashKey.get(offset + 8) & BYTE_MASK); 197 case 8: 198 b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24); 199 case 7: 200 b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16); 201 case 6: 202 b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8); 203 case 5: 204 b += (hashKey.get(offset + 4) & BYTE_MASK); 205 case 4: 206 a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24); 207 case 3: 208 a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16); 209 case 2: 210 a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8); 211 case 1: 212 //noinspection PointlessArithmeticExpression 213 a += (hashKey.get(offset + 0) & BYTE_MASK); 214 break; 215 case 0: 216 return c; 217 } 218 /* 219 * final -- final mixing of 3 32-bit values (a,b,c) into c 220 * 221 * Pairs of (a,b,c) values differing in only a few bits will usually 222 * produce values of c that look totally different. This was tested for 223 * - pairs that differed by one bit, by two bits, in any combination 224 * of top bits of (a,b,c), or in any combination of bottom bits of 225 * (a,b,c). 226 * 227 * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 228 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 229 * is commonly produced by subtraction) look like a single 1-bit 230 * difference. 231 * 232 * - the base values were pseudorandom, all zero but one bit set, or 233 * all zero plus a counter that starts at zero. 234 * 235 * These constants passed: 236 * 14 11 25 16 4 14 24 237 * 12 14 25 16 4 14 24 238 * and these came close: 239 * 4 8 15 26 3 22 24 240 * 10 8 15 26 3 22 24 241 * 11 8 15 26 3 22 24 242 * 243 * #define final(a,b,c) \ 244 * { 245 * c ^= b; c -= rot(b,14); \ 246 * a ^= c; a -= rot(c,11); \ 247 * b ^= a; b -= rot(a,25); \ 248 * c ^= b; c -= rot(b,16); \ 249 * a ^= c; a -= rot(c,4); \ 250 * b ^= a; b -= rot(a,14); \ 251 * c ^= b; c -= rot(b,24); \ 252 * } 253 * 254 */ 255 c ^= b; c -= rotateLeft(b, 14); 256 a ^= c; a -= rotateLeft(c, 11); 257 b ^= a; b -= rotateLeft(a, 25); 258 c ^= b; c -= rotateLeft(b, 16); 259 a ^= c; a -= rotateLeft(c, 4); 260 b ^= a; b -= rotateLeft(a, 14); 261 c ^= b; c -= rotateLeft(b, 24); 262 return c; 263 } 264}