001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static java.lang.Integer.rotateLeft; 021 022import java.io.FileInputStream; 023import java.io.IOException; 024import org.apache.yetus.audience.InterfaceAudience; 025import org.apache.yetus.audience.InterfaceStability; 026 027/** 028 * Produces 32-bit hash for hash table lookup. 029 * 030 * <pre> 031 * lookup3.c, by Bob Jenkins, May 2006, Public Domain. 032 * 033 * You can use this free for any purpose. It's in the public domain. 034 * It has no warranty. 035 * </pre> 036 * 037 * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a> 038 * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this function compares to 039 * others such as CRC, MD?, etc</a> 040 * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the Dr. Dobbs 041 * Article</a> 042 */ 043@InterfaceAudience.Private 044@InterfaceStability.Stable 045public class JenkinsHash extends Hash { 046 private static final int BYTE_MASK = 0xff; 047 048 private static JenkinsHash _instance = new JenkinsHash(); 049 050 public static Hash getInstance() { 051 return _instance; 052 } 053 054 /** 055 * Compute the hash of the specified file 056 * @param args name of file to compute hash of. 057 * @throws IOException e 058 */ 059 public static void main(String[] args) throws IOException { 060 if (args.length != 1) { 061 System.err.println("Usage: JenkinsHash filename"); 062 System.exit(-1); 063 } 064 FileInputStream in = new FileInputStream(args[0]); 065 byte[] bytes = new byte[512]; 066 int value = 0; 067 JenkinsHash hash = new JenkinsHash(); 068 try { 069 for (int length = in.read(bytes); length > 0; length = in.read(bytes)) { 070 value = hash.hash(new ByteArrayHashKey(bytes, 0, length), value); 071 } 072 } finally { 073 in.close(); 074 } 075 System.out.println(Math.abs(value)); 076 } 077 078 /** 079 * taken from hashlittle() -- hash a variable-length key into a 32-bit value 080 * @param hashKey the key to extract the bytes for hash algo 081 * @param initval can be any integer value 082 * @return a 32-bit value. Every bit of the key affects every bit of the return value. Two keys 083 * differing by one or two bits will have totally different hash values. 084 * <p> 085 * The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is 086 * sooo slow!). If you need less than 32 bits, use a bitmask. For example, if you need 087 * only 10 bits, do <code>h = (h & hashmask(10));</code> In which case, the hash table 088 * should have hashsize(10) elements. 089 * <p> 090 * If you are hashing n strings byte[][] k, do it like this: for (int i = 0, h = 0; i < 091 * n; ++i) h = hash( k[i], h); 092 * <p> 093 * By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this code any way you 094 * wish, private, educational, or commercial. It's free. 095 * <p> 096 * Use for hash table lookup, or anything where one collision in 2^^32 is acceptable. Do 097 * NOT use for cryptographic purposes. 098 */ 099 @SuppressWarnings({ "fallthrough", "MissingDefault" }) 100 @Override 101 public <T> int hash(HashKey<T> hashKey, int initval) { 102 int length = hashKey.length(); 103 int a, b, c; 104 a = b = c = 0xdeadbeef + length + initval; 105 int offset = 0; 106 for (; length > 12; offset += 12, length -= 12) { 107 a += (hashKey.get(offset) & BYTE_MASK); 108 a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8); 109 a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16); 110 a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24); 111 b += (hashKey.get(offset + 4) & BYTE_MASK); 112 b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8); 113 b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16); 114 b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24); 115 c += (hashKey.get(offset + 8) & BYTE_MASK); 116 c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8); 117 c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16); 118 c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24); 119 120 /* 121 * mix -- mix 3 32-bit values reversibly. This is reversible, so any information in (a,b,c) 122 * before mix() is still in (a,b,c) after mix(). If four pairs of (a,b,c) inputs are run 123 * through mix(), or through mix() in reverse, there are at least 32 bits of the output that 124 * are sometimes the same for one pair and different for another pair. This was tested for: - 125 * pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), or 126 * in any combination of bottom bits of (a,b,c). - "differ" is defined as +, -, ^, or ~^. For 127 * + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is 128 * commonly produced by subtraction) look like a single 1-bit difference. - the base values 129 * were pseudorandom, all zero but one bit set, or all zero plus a counter that starts at 130 * zero. Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this are 4 6 131 * 8 16 19 4 9 15 3 18 27 15 14 9 3 7 17 3 Well, "9 15 3 18 27 15" didn't quite get 32 bits 132 * diffing for "differ" defined as + with a one-bit base and a two-bit delta. I used 133 * http://burtleburtle.net/bob/hash/avalanche.html to choose the operations, constants, and 134 * arrangements of the variables. This does not achieve avalanche. There are input bits of 135 * (a,b,c) that fail to affect some output bits of (a,b,c), especially of a. The most 136 * thoroughly mixed value is c, but it doesn't really even achieve avalanche in c. This allows 137 * some parallelism. Read-after-writes are good at doubling the number of bits affected, so 138 * the goal of mixing pulls in the opposite direction as the goal of parallelism. I did what I 139 * could. Rotates seem to cost as much as shifts on every machine I could lay my hands on, and 140 * rotates are much kinder to the top and bottom bits, so I used rotates. #define mix(a,b,c) \ 141 * { \ a -= c; a ^= rot(c, 4); c += b; \ b -= a; b ^= rot(a, 6); a += c; \ c -= b; c ^= rot(b, 142 * 8); b += a; \ a -= c; a ^= rot(c,16); c += b; \ b -= a; b ^= rot(a,19); a += c; \ c -= b; c 143 * ^= rot(b, 4); b += a; \ } mix(a,b,c); 144 */ 145 a -= c; 146 a ^= rotateLeft(c, 4); 147 c += b; 148 b -= a; 149 b ^= rotateLeft(a, 6); 150 a += c; 151 c -= b; 152 c ^= rotateLeft(b, 8); 153 b += a; 154 a -= c; 155 a ^= rotateLeft(c, 16); 156 c += b; 157 b -= a; 158 b ^= rotateLeft(a, 19); 159 a += c; 160 c -= b; 161 c ^= rotateLeft(b, 4); 162 b += a; 163 } 164 165 // -------------------------------- last block: affect all 32 bits of (c) 166 switch (length) { // all the case statements fall through 167 case 12: 168 c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24); 169 case 11: 170 c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16); 171 case 10: 172 c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8); 173 case 9: 174 c += (hashKey.get(offset + 8) & BYTE_MASK); 175 case 8: 176 b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24); 177 case 7: 178 b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16); 179 case 6: 180 b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8); 181 case 5: 182 b += (hashKey.get(offset + 4) & BYTE_MASK); 183 case 4: 184 a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24); 185 case 3: 186 a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16); 187 case 2: 188 a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8); 189 case 1: 190 // noinspection PointlessArithmeticExpression 191 a += (hashKey.get(offset + 0) & BYTE_MASK); 192 break; 193 case 0: 194 return c; 195 } 196 /* 197 * final -- final mixing of 3 32-bit values (a,b,c) into c Pairs of (a,b,c) values differing in 198 * only a few bits will usually produce values of c that look totally different. This was tested 199 * for - pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), 200 * or in any combination of bottom bits of (a,b,c). - "differ" is defined as +, -, ^, or ~^. For 201 * + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is 202 * commonly produced by subtraction) look like a single 1-bit difference. - the base values were 203 * pseudorandom, all zero but one bit set, or all zero plus a counter that starts at zero. These 204 * constants passed: 14 11 25 16 4 14 24 12 14 25 16 4 14 24 and these came close: 4 8 15 26 3 205 * 22 24 10 8 15 26 3 22 24 11 8 15 26 3 22 24 #define final(a,b,c) \ { c ^= b; c -= rot(b,14); 206 * \ a ^= c; a -= rot(c,11); \ b ^= a; b -= rot(a,25); \ c ^= b; c -= rot(b,16); \ a ^= c; a -= 207 * rot(c,4); \ b ^= a; b -= rot(a,14); \ c ^= b; c -= rot(b,24); \ } 208 */ 209 c ^= b; 210 c -= rotateLeft(b, 14); 211 a ^= c; 212 a -= rotateLeft(c, 11); 213 b ^= a; 214 b -= rotateLeft(a, 25); 215 c ^= b; 216 c -= rotateLeft(b, 16); 217 a ^= c; 218 a -= rotateLeft(c, 4); 219 b ^= a; 220 b -= rotateLeft(a, 14); 221 c ^= b; 222 c -= rotateLeft(b, 24); 223 return c; 224 } 225}