MurmurHash2 for Java

寝付けないので暇つぶしに作ってみた。ネタ元はここ。元の戻り値はunsigned intだけど、Javaにはないのでint型で。計算自体は同じだから、unsignedでもbit列としては同じ結果になっているはず。

[追記] 書き忘れたけど元と同じくPUBLIC DOMAINということでよろ

[追記 2009/03/08] javaのbyteはbit演算するときにint型に変換されるの忘れてたorz byte列に負の値があるときに正しくない値になっていたので修正。

public class MurmurHash2 {
  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.out.println("java MurmurHash2 <text> <seed>");
      return;
    }
    
    byte[] key = args[0].getBytes("UTF-8");
    int seed = Integer.parseInt(args[1]);
    System.out.print(hash(key, seed));
  }
  
  public static int hash(byte[] key, int seed) {
    // 'm' is mixing constants generated offline.
    // They're not really 'magic', they just happen to work well.
    int m = 0x5bd1e995;

    // Initialize the hash to a 'random' value
    int h = seed ^ key.length;

    // Mix 4 bytes at a time into the hash
    int i = 0;
    while (i < key.length-3) {
      int k = (key[i++] & 0xff);
      k |= (key[i++] & 0xff) << 8;
      k |= (key[i++] & 0xff) << 16;
      k |= (key[i++] & 0xff) << 24;
      
      k *= m; 
      k ^= k >>> 24; 
      k *= m; 
      
      h *= m; 
      h ^= k;
    }
    
    // Handle the last few bytes of the input array
    switch (key.length-i) {
    case 3: h ^= (key[i++] & 0xff) << 16;
    case 2: h ^= (key[i++] & 0xff) << 8;
    case 1: h ^= (key[i++] & 0xff);
            h *= m;
    }

    // Do a few final mixes of the hash to ensure the last few
    // bytes are well-incorporated.
    h ^= h >>> 13;
    h *= m;
    h ^= h >>> 15;

    return h;
  }
}