//----------------------------------------------------------------------------- // MurmurHash2, 64-bit versions, by Austin Appleby // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment // and endian-ness issues if used across multiple platforms. typedef unsigned long int uint64_t; // 64-bit hash for 64-bit platforms uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed ) { const uint64_t m = 0xc6a4a7935bd1e995; const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t * data = (const uint64_t *)key; const uint64_t * end = data + (len/8); while(data != end) { uint64_t k = *data++; k *= m; k ^= k >> r; k *= m; h ^= k; h *= m; } const unsigned char * data2 = (const unsigned char*)data; switch(len & 7) { case 7: h ^= uint64_t(data2[6]) << 48; case 6: h ^= uint64_t(data2[5]) << 40; case 5: h ^= uint64_t(data2[4]) << 32; case 4: h ^= uint64_t(data2[3]) << 24; case 3: h ^= uint64_t(data2[2]) << 16; case 2: h ^= uint64_t(data2[1]) << 8; case 1: h ^= uint64_t(data2[0]); h *= m; }; h ^= h >> r; h *= m; h ^= h >> r; return h; } // 64-bit hash for 32-bit platforms uint64_t MurmurHash64B ( const void * key, int len, unsigned int seed ) { const unsigned int m = 0x5bd1e995; const int r = 24; unsigned int h1 = seed ^ len; unsigned int h2 = 0; const unsigned int * data = (const unsigned int *)key; while(len >= 8) { unsigned int k1 = *data++; k1 *= m; k1 ^= k1 >> r; k1 *= m; h1 *= m; h1 ^= k1; len -= 4; unsigned int k2 = *data++; k2 *= m; k2 ^= k2 >> r; k2 *= m; h2 *= m; h2 ^= k2; len -= 4; } if(len >= 4) { unsigned int k1 = *data++; k1 *= m; k1 ^= k1 >> r; k1 *= m; h1 *= m; h1 ^= k1; len -= 4; } switch(len) { case 3: h2 ^= ((unsigned char*)data)[2] << 16; case 2: h2 ^= ((unsigned char*)data)[1] << 8; case 1: h2 ^= ((unsigned char*)data)[0]; h2 *= m; }; h1 ^= h2 >> 18; h1 *= m; h2 ^= h1 >> 22; h2 *= m; h1 ^= h2 >> 17; h1 *= m; h2 ^= h1 >> 19; h2 *= m; uint64_t h = h1; h = (h << 32) | h2; return h; }2)cityhash需要下载、配置、编译后才能使用
#include <iostream> #include <string> #include <time.h> #include "city.h" using namespace std; uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed ); int main(int argc, char** argv) { char buf[1024]; time_t t1, t2; FILE* file = fopen(argv[1], "r"); int pattern = atoi(argv[2]); t1 = time(NULL); while(fgets(buf, 1024, file) != NULL) { if(1 == pattern) { MurmurHash64A(buf, strlen(buf), 16); } else { CityHash64WithSeed(buf, strlen(buf), 16); } } t2 = time(NULL); cout << (t2-t1) << endl; return 0; }
#!/bin/sh #sh make_random_strings 16 10000 #make 10000 strings, the string length:16 for((j=0; j<$2; j++)) { str= for((i=0; i<$1; i++)) { one=`cat /dev/urandom |strings -n 1 | cut -c-1| head -1` str=${str}${one} } echo $str }
测试长度16的字符串hash速度