/* the famous DJB Hash Function for strings */ unsigned int DJBHash(char *str) { unsigned int hash = 5381; while (*str){ hash = ((hash << 5) + hash) + (*str++); /* times 33 */ } hash &= ~(1 << 31); /* strip the highest bit */ return hash; }
uint32_t ngx_murmur_hash2(u_char *data, size_t len) { uint32_t h, k; h = 0 ^ len; while (len >= 4) { k = data[0]; k |= data[1] << 8; k |= data[2] << 16; k |= data[3] << 24; k *= 0x5bd1e995; k ^= k >> 24; k *= 0x5bd1e995; h *= 0x5bd1e995; h ^= k; data += 4; len -= 4; } switch (len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= 0x5bd1e995; } h ^= h >> 13; h *= 0x5bd1e995; h ^= h >> 15; return h; }
uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. const uint32_t m = 0x5bd1e995; const int r = 24; // Initialize the hash to a 'random' value uint32_t h = seed ^ len; // Mix 4 bytes at a time into the hash const unsigned char * data = (const unsigned char *)key; while(len >= 4) { uint32_t k = *(uint32_t*)data; k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; len -= 4; } // Handle the last few bytes of the input array switch(len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; // Do a few final mixes of the hash to ensure the last few // bytes are well-incorporated. h ^= h >> 13; h *= m; h ^= h >> 15; return h; }
murmur: len 20,used 171.30 mili murmur: len 19,used 163.68 mili murmur: len 18,used 158.20 mili murmur: len 17,used 159.94 mili murmur: len 16,used 155.63 mili murmur: len 15,used 147.29 mili murmur: len 14,used 145.16 mili murmur: len 13,used 145.12 mili murmur: len 12,used 138.94 mili murmur: len 11,used 135.30 mili murmur: len 10,used 131.71 mili murmur: len 9,used 129.11 mili murmur: len 8,used 122.59 mili murmur: len 7,used 117.09 mili murmur: len 6,used 108.61 mili murmur: len 5,used 113.07 mili murmur: len 4,used 100.49 mili murmur: len 3,used 93.98 mili murmur: len 2,used 88.14 mili murmur: len 1,used 86.02 mili djb: len 20,used 272.66 mili djb: len 19,used 257.07 mili djb: len 18,used 242.63 mili djb: len 17,used 228.25 mili djb: len 16,used 214.33 mili djb: len 15,used 200.17 mili djb: len 14,used 186.42 mili djb: len 13,used 171.60 mili djb: len 12,used 158.15 mili djb: len 11,used 145.17 mili djb: len 10,used 130.69 mili djb: len 9,used 121.37 mili djb: len 8,used 107.93 mili djb: len 7,used 97.16 mili djb: len 6,used 87.17 mili djb: len 5,used 76.95 mili djb: len 4,used 66.98 mili djb: len 3,used 56.86 mili djb: len 2,used 46.84 mili djb: len 1,used 36.79 mili 只有当key的长度大于10字节的时候,MurmurHash的运算速度才快于DJB。而且前提是外部给出key的长度。MurmurHash2(key, strlen(key), seed)的调用方式是惨不忍睹的: murmur: len 20,used 812.83 mili murmur: len 19,used 802.80 mili murmur: len 18,used 776.02 mili murmur: len 17,used 752.58 mili murmur: len 16,used 719.21 mili murmur: len 15,used 699.04 mili murmur: len 14,used 675.73 mili murmur: len 13,used 655.55 mili murmur: len 12,used 625.90 mili murmur: len 11,used 617.32 mili murmur: len 10,used 593.83 mili murmur: len 9,used 567.01 mili murmur: len 8,used 536.72 mili murmur: len 7,used 516.73 mili murmur: len 6,used 493.17 mili murmur: len 5,used 469.66 mili murmur: len 4,used 439.49 mili murmur: len 3,used 405.99 mili murmur: len 2,used 385.84 mili murmur: len 1,used 358.96 mili djb: len 20,used 293.64 mili djb: len 19,used 258.24 mili djb: len 18,used 244.04 mili djb: len 17,used 228.52 mili djb: len 16,used 215.22 mili djb: len 15,used 200.68 mili djb: len 14,used 187.65 mili djb: len 13,used 172.02 mili djb: len 12,used 158.67 mili djb: len 11,used 145.40 mili djb: len 10,used 157.98 mili djb: len 9,used 120.30 mili djb: len 8,used 106.59 mili djb: len 7,used 95.54 mili djb: len 6,used 86.32 mili djb: len 5,used 76.11 mili djb: len 4,used 67.41 mili djb: len 3,used 57.02 mili djb: len 2,used 50.33 mili djb: len 1,used 43.64 mili 结论:从计算速度上来看,MurmurHash只适用于已知长度的、长度比较长的字符串。长度未知或者长度不超过10字节,都应该使用DJB。在C/C++编程里,你很难保证每个char*的key都会带着一个单独length传递,因此MurmurHash的适用场景会受到限制(让外部调用者去strlen消耗的仍然是计算机的计算能力)。另外,对于最常见的userID之类的key,长度一般都不会超过10字节,MurmurHash对于这种最常见的key仍然表现不佳。 hash的分布情况则更难说明,我知道的是选择合适的bucket size之后,DJB平均只要1.2次strcmp,因此MurmurHash不可能大幅超越DJB,最多和DJB大致相当。 结论之结论:还是DJB好。只有在特别的数据特别的场合,才应该考虑单独的使用MurmurHash。
http://blog.csdn.net/wwwsq/article/details/4254123
http://blog.csdn.net/wenchao126/article/details/8364988