性能最好的字符串转64位Hash函数

问题:两个海量字符串数据集,在内存有限、不能生成中间文件的前提下,如何求出它们的交集?
答:将字符串转换成数字处理。本文包含6种一次hash冲突概率极低的hash函数,其性能比较为 CityHash64>FarmHash64>Murmur3>Murmur2_64A>Murmur2_64B>Blizzard_MPQ

第6名 - Blizzard_MPQ

#include 
using namespace std;
using ui = unsigned int;
using ul = unsigned long;
using uc = unsigned char;

#define Type 0
ul MagicTable[0x500];

void init(){
  ul seed =0x00100001, index1 =0, index2 =0, i;   
  for(index1 = 0; index1 < 0x100; ++index1){    
    for(index2 = index1, i = 0; i < 5; ++i, index2 += 0x100){    
      ul temp1, temp2;   
      seed = (seed * 125 + 3) % 0x2AAAAB;   
      temp1 = (seed & 0xFFFF) << 0x10;   
      seed = (seed * 125 + 3) % 0x2AAAAB;   
      temp2 = (seed & 0xFFFF);   
      MagicTable[index2] = ( temp1 | temp2 );    
    }    
  }
  return ;
}

// Blizzard  
ul MPQHash(const char* str, ul HashType){    
  uc *key = (uc*)str;   
  ul seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;   
  int ch;   
  while(*key != 0){    
    ch = toupper(*key++);   
    seed1 = MagicTable[(HashType <<8) + ch] ^ (seed1 + seed2);   
    seed2 = ch + seed1 + seed2 + (seed2 << 5) +3;    
  }   
  return seed1;    
}

int main(int argc, char**argv){
  init();
  string str = "最強|Hash #6";
  ul x = MPQHash(str.c_str(), Type);
  cout<< x <<endl;
  return 0;
}

第5名 - Murmur2_64B

#include 
using namespace std;
using ui = unsigned int;
using ul = unsigned long;
using uc = unsigned char;
using ull = unsigned long long;

#define SEED 0xEE6B27EB

ull MurmurHash64B(const void * key, int len, ui seed){
	const ui m = 0x5bd1e995;
	const int r = 24;
 
	ui h1 = seed ^ len;
	ui h2 = 0;
 
	const ui * data = (const ui *)key;
 
	while(len >= 8){
		ui k1 = *data++;
		k1 *= m; k1 ^= k1 >> r; k1 *= m;
		h1 *= m; h1 ^= k1;
		len -= 4;
 
		ui k2 = *data++;
		k2 *= m; k2 ^= k2 >> r; k2 *= m;
		h2 *= m; h2 ^= k2;
		len -= 4;
	}
 
	if(len >= 4){
		ui k1 = *data++;
		k1 *= m; k1 ^= k1 >> r; k1 *= m;
		h1 *= m; h1 ^= k1;
		len -= 4;
	}
 
	switch(len){
	  case 3: h2 ^= ((uc*)data)[2] << 16;
	  case 2: h2 ^= ((uc*)data)[1] << 8;
	  case 1: h2 ^= ((uc*)data)[0];
		h2 *= m;
	};
 
	h1 ^= h2 >> 18; h1 *= m;
	h2 ^= h1 >> 22; h2 *= m;
	h1 ^= h2 >> 17; h1 *= m;
	h2 ^= h1 >> 19; h2 *= m;
 
	ull h = h1;
 
	h = (h << 32) | h2;
 
	return h;
} 

int main(int argc, char**argv){
  string str = "最強|Hash #5";
  ull x = MurmurHash64B(str.c_str(), str.size(), SEED);
  cout<< x <<endl;
  return 0;
}

第4名 - Murmur2_64A

#include 
using namespace std;
using ui = unsigned int;
using ul = unsigned long;
using uc = unsigned char;
using ull = unsigned long long;

#define SEED 16

ull MurmurHash64A( const void * key, int len, ui seed){
  const ull m = 0xc6a4a7935bd1e995;
  const int r = 47;
 
  ull h = seed ^ (len * m);
 
  const ull * data = (const ull *)key;
  const ull * end = data + (len/8);
 
  while(data != end){
    ull k = *data++;
 
    k *= m; 
    k ^= k >> r; 
    k *= m; 
    h ^= k;
    h *= m; 
  }
 
  const uc * data2 = (const uc*)data;
 
  switch(len & 7){
    case 7: h ^= ull(data2[6]) << 48;
    case 6: h ^= ull(data2[5]) << 40;
    case 5: h ^= ull(data2[4]) << 32;
    case 4: h ^= ull(data2[3]) << 24;
    case 3: h ^= ull(data2[2]) << 16;
    case 2: h ^= ull(data2[1]) << 8;
    case 1: h ^= ull(data2[0]);
    h *= m;
  };
 
  h ^= h >> r;
  h *= m;
  h ^= h >> r;

  return h;
} 

int main(int argc, char**argv){
  string str = "最強|Hash #4";
  ull x = MurmurHash64A(str.c_str(), str.size(), SEED);
  cout<< x <<endl;
  return 0;
}

第3名 - Murmur3

#include 
using namespace std;
using ui = unsigned int;
using ul = unsigned long;
using uc = unsigned char;
using ull = unsigned long long;

#define FORCE_INLINE inline
#define BIG_CONSTANT(x) (x##LLU)

static FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
{
  return (x << r) | (x >> (64 - r));
}

FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
{
  return p[i];
}

FORCE_INLINE uint64_t fmix64 ( uint64_t k )
{
  k ^= k >> 33;
  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
  k ^= k >> 33;
  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
  k ^= k >> 33;
 
  return k;
}

#define SEED 42
#define ROTL64(x,y)	rotl64(x,y)

uint64_t MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed)
{
  const uint8_t * data = (const uint8_t*)key;
  const int nblocks = len / 16;
 
  uint64_t h1 = seed;
  uint64_t h2 = seed;
 
  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
 
  //----------
  // body
 
  const uint64_t * blocks = (const uint64_t *)(data);
 
  for(int i = 0; i < nblocks; i++)
  {
    uint64_t k1 = getblock64(blocks,i*2+0);
    uint64_t k2 = getblock64(blocks,i*2+1);
 
    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
 
    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
 
    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
 
    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
  }
 
  //----------
  // tail
 
  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
 
  uint64_t k1 = 0;
  uint64_t k2 = 0;
 
  switch(len & 15)
  {
  case 15: k2 ^= ((uint64_t)tail[14]) << 48;
  case 14: k2 ^= ((uint64_t)tail[13]) << 40;
  case 13: k2 ^= ((uint64_t)tail[12]) << 32;
  case 12: k2 ^= ((uint64_t)tail[11]) << 24;
  case 11: k2 ^= ((uint64_t)tail[10]) << 16;
  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
  case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
 
  case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
  case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
  case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
  case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
  case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
  case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
  case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
  case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
  };
 
  //----------
  // finalization
 
  h1 ^= len; h2 ^= len;
 
  h1 += h2;
  h2 += h1;
 
  h1 = fmix64(h1);
  h2 = fmix64(h2);
 
  h1 += h2;
  h2 += h1;
 
  return h1;
}

int main(int argc, char**argv){
  string str = "最強|Hash #3";
  uint64_t x = MurmurHash3_x64_128(str.c_str(), str.size(), SEED);
  cout<< x <<endl;
  return 0;
}

第2名 - FarmHash64

//Google FarmHash

#include 

typedef std::pair<uint64_t, uint64_t> uint128_t;
inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
#define STATIC_INLINE static inline

using namespace std;

using ui = unsigned int;
using ul = unsigned long;
using uc = unsigned char;
using ull = unsigned long long;

// Some primes between 2^63 and 2^64 for various uses.
static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
static const uint64_t k1 = 0xb492b66fbe98f273ULL;
static const uint64_t k2 = 0x9ae16a3b2f90404fULL;

#define uint32_in_expected_order(x) (x)
#define uint64_in_expected_order(x) (x)

STATIC_INLINE uint64_t Fetch64(const char *p) {
  uint64_t result;
  memcpy(&result, p, sizeof(result));
  return uint64_in_expected_order(result);
}

STATIC_INLINE uint32_t Fetch32(const char *p) {
  uint32_t result;
  memcpy(&result, p, sizeof(result));
  return uint32_in_expected_order(result);
}

STATIC_INLINE uint32_t bswap_32(const uint32_t x) {
  uint32_t y = x;
  for (size_t i = 0; i<sizeof(uint32_t)>> 1; i++) {
    uint32_t d = sizeof(uint32_t) - i - 1;
    uint32_t mh = ((uint32_t)0xff) << (d << 3);
    uint32_t ml = ((uint32_t)0xff) << (i << 3);
    uint32_t h = x & mh;
    uint32_t l = x & ml;
    uint64_t t = (l << ((d - i) << 3)) | (h >> ((d - i) << 3));
    y = t | (y & ~(mh | ml));
  }
  return y;
}

STATIC_INLINE uint64_t bswap_64(const uint64_t x) {
  uint64_t y = x;
  for (size_t i = 0; i<sizeof(uint64_t)>> 1; i++) {
    uint64_t d = sizeof(uint64_t) - i - 1;
    uint64_t mh = ((uint64_t)0xff) << (d << 3);
    uint64_t ml = ((uint64_t)0xff) << (i << 3);
    uint64_t h = x & mh;
    uint64_t l = x & ml;
    uint64_t t = (l << ((d - i) << 3)) | (h >> ((d - i) << 3));
    y = t | (y & ~(mh | ml));
  }
  return y;
}

STATIC_INLINE uint32_t Bswap32(uint32_t val) { return bswap_32(val); }
STATIC_INLINE uint64_t Bswap64(uint64_t val) { return bswap_64(val); }

// FARMHASH PORTABILITY LAYER: bitwise rot

STATIC_INLINE uint32_t BasicRotate32(uint32_t val, int shift) {
  // Avoid shifting by 32: doing so yields an undefined result.
  return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
}

STATIC_INLINE uint64_t BasicRotate64(uint64_t val, int shift) {
  // Avoid shifting by 64: doing so yields an undefined result.
  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}

STATIC_INLINE uint32_t Rotate32(uint32_t val, int shift) {
  return BasicRotate32(val, shift);
}
STATIC_INLINE uint64_t Rotate64(uint64_t val, int shift) {
  return BasicRotate64(val, shift);
}

// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
// May change from time to time, may differ on different platforms, may differ
// depending on NDEBUG.
STATIC_INLINE uint64_t Hash128to64(uint128_t x) {
  // Murmur-inspired hashing.
  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  a ^= (a >> 47);
  uint64_t b = (Uint128High64(x) ^ a) * kMul;
  b ^= (b >> 47);
  b *= kMul;
  return b;
}

STATIC_INLINE uint64_t ShiftMix(uint64_t val) {
  return val ^ (val >> 47);
}

STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v) {
  return Hash128to64(Uint128(u, v));
}

STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
  // Murmur-inspired hashing.
  uint64_t a = (u ^ v) * mul;
  a ^= (a >> 47);
  uint64_t b = (v ^ a) * mul;
  b ^= (b >> 47);
  b *= mul;
  return b;
}

STATIC_INLINE uint64_t HashLen0to16(const char *s, size_t len) {
  if (len >= 8) {
    uint64_t mul = k2 + len * 2;
    uint64_t a = Fetch64(s) + k2;
    uint64_t b = Fetch64(s + len - 8);
    uint64_t c = Rotate64(b, 37) * mul + a;
    uint64_t d = (Rotate64(a, 25) + b) * mul;
    return HashLen16(c, d, mul);
  }
  if (len >= 4) {
    uint64_t mul = k2 + len * 2;
    uint64_t a = Fetch32(s);
    return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
  }
  if (len > 0) {
    uint8_t a = s[0];
    uint8_t b = s[len >> 1];
    uint8_t c = s[len - 1];
    uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
    uint32_t z = len + (static_cast<uint32_t>(c) << 2);
    return ShiftMix(y * k2 ^ z * k0) * k2;
  }
  return k2;
}

// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
STATIC_INLINE uint64_t HashLen17to32(const char *s, size_t len) {
  uint64_t mul = k2 + len * 2;
  uint64_t a = Fetch64(s) * k1;
  uint64_t b = Fetch64(s + 8);
  uint64_t c = Fetch64(s + len - 8) * mul;
  uint64_t d = Fetch64(s + len - 16) * k2;
  return HashLen16(Rotate64(a + b, 43) + Rotate64(c, 30) + d,
                   a + Rotate64(b + k2, 18) + c, mul);
}

// Return a 16-byte hash for 48 bytes.  Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
    uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
  a += w;
  b = Rotate64(b + a + z, 21);
  uint64_t c = a;
  a += x;
  a += y;
  b += Rotate64(a, 44);
  return make_pair(a + z, b + c);
}

// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
    const char* s, uint64_t a, uint64_t b) {
  return WeakHashLen32WithSeeds(Fetch64(s),
                                Fetch64(s + 8),
                                Fetch64(s + 16),
                                Fetch64(s + 24),
                                a,
                                b);
}

// Return an 8-byte hash for 33 to 64 bytes.
STATIC_INLINE uint64_t HashLen33to64(const char *s, size_t len) {
  uint64_t mul = k2 + len * 2;
  uint64_t a = Fetch64(s) * k2;
  uint64_t b = Fetch64(s + 8);
  uint64_t c = Fetch64(s + len - 8) * mul;
  uint64_t d = Fetch64(s + len - 16) * k2;
  uint64_t y = Rotate64(a + b, 43) + Rotate64(c, 30) + d;
  uint64_t z = HashLen16(y, a + Rotate64(b + k2, 18) + c, mul);
  uint64_t e = Fetch64(s + 16) * mul;
  uint64_t f = Fetch64(s + 24);
  uint64_t g = (y + Fetch64(s + len - 32)) * mul;
  uint64_t h = (z + Fetch64(s + len - 24)) * mul;
  return HashLen16(Rotate64(e + f, 43) + Rotate64(g, 30) + h,
                   e + Rotate64(f + a, 18) + g, mul);
}


uint64_t FarmHash64(const char *s, size_t len) {
  const uint64_t seed = 81;
  if (len <= 32) {
    if (len <= 16) {
      return HashLen0to16(s, len);
    } else {
      return HashLen17to32(s, len);
    }
  } else if (len <= 64) {
    return HashLen33to64(s, len);
  }

  // For strings over 64 bytes we loop.  Internal state consists of
  // 56 bytes: v, w, x, y, and z.
  uint64_t x = seed;
  uint64_t y = seed * k1 + 113;
  uint64_t z = ShiftMix(y * k2 + 113) * k2;
  pair<uint64_t, uint64_t> v = make_pair(0, 0);
  pair<uint64_t, uint64_t> w = make_pair(0, 0);
  x = x * k2 + Fetch64(s);

  // Set end so that after the loop we have 1 to 64 bytes left to process.
  const char* end = s + ((len - 1) / 64) * 64;
  const char* last64 = end + ((len - 1) & 63) - 63;
  assert(s + len - 64 == last64);
  do {
    x = Rotate64(x + y + v.first + Fetch64(s + 8), 37) * k1;
    y = Rotate64(y + v.second + Fetch64(s + 48), 42) * k1;
    x ^= w.second;
    y += v.first + Fetch64(s + 40);
    z = Rotate64(z + w.first, 33) * k1;
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    std::swap(z, x);
    s += 64;
  } while (s != end);
  uint64_t mul = k1 + ((z & 0xff) << 1);
  // Make s point to the last 64 bytes of input.
  s = last64;
  w.first += ((len - 1) & 63);
  v.first += w.first;
  w.first += v.first;
  x = Rotate64(x + y + v.first + Fetch64(s + 8), 37) * mul;
  y = Rotate64(y + v.second + Fetch64(s + 48), 42) * mul;
  x ^= w.second * 9;
  y += v.first * 9 + Fetch64(s + 40);
  z = Rotate64(z + w.first, 33) * mul;
  v = WeakHashLen32WithSeeds(s, v.second * mul, x + w.first);
  w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
  std::swap(z, x);
  return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z,
                   HashLen16(v.second, w.second, mul) + x,
                   mul);
}

int main(int argc, char** argv){
  string str = "最強|Hash #2";
  uint64_t x = FarmHash64(str.c_str(), str.size());
  cout<< x <<endl;
  return 0;
}

第1名 - CityHash64

//Google CityHash

#include 
using namespace std;

typedef std::pair<uint64_t, uint64_t> uint128_t;
#define STATIC_INLINE static inline

STATIC_INLINE uint64_t Uint128Low64(const uint128_t& x) { return x.first; }
STATIC_INLINE uint64_t Uint128High64(const uint128_t& x) { return x.second; }

static const uint64_t kSeed0 = 1234567;
static const uint64_t kSeed1 = 0xc3a5c85c97cb3127ULL;
static const uint128_t kSeed128(kSeed0, kSeed1);

STATIC_INLINE uint64_t UNALIGNED_LOAD64(const char *p) {
  uint64_t result;
  memcpy(&result, p, sizeof(result));
  return result;
}

STATIC_INLINE uint32_t UNALIGNED_LOAD32(const char *p) {
  uint32_t result;
  memcpy(&result, p, sizeof(result));
  return result;
}

#define uint32_in_expected_order(x) (x)
#define uint64_in_expected_order(x) (x)

STATIC_INLINE uint32_t bswap_32(const uint32_t x) {
  uint32_t y = x;
  for (size_t i = 0; i<sizeof(uint32_t)>> 1; ++i) {
    uint32_t d = sizeof(uint32_t) - i - 1;
    uint32_t mh = ((uint32_t)0xff) << (d << 3);
    uint32_t ml = ((uint32_t)0xff) << (i << 3);
    uint32_t h = x & mh;
    uint32_t l = x & ml;
    uint64_t t = (l << ((d - i) << 3)) | (h >> ((d - i) << 3));
    y = t | (y & ~(mh | ml));
  }
  return y;
}

STATIC_INLINE uint64_t bswap_64(const uint64_t x) {
  uint64_t y = x;
  for (size_t i = 0; i<sizeof(uint64_t)>> 1; ++i) {
    uint64_t d = sizeof(uint64_t) - i - 1;
    uint64_t mh = ((uint64_t)0xff) << (d << 3);
    uint64_t ml = ((uint64_t)0xff) << (i << 3);
    uint64_t h = x & mh;
    uint64_t l = x & ml;
    uint64_t t = (l << ((d - i) << 3)) | (h >> ((d - i) << 3));
    y = t | (y & ~(mh | ml));
  }
  return y;
}

// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
STATIC_INLINE uint64_t Hash128to64(const uint128_t& x) {
  // Murmur-inspired hashing.
  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  a ^= (a >> 47);
  uint64_t b = (Uint128High64(x) ^ a) * kMul;
  b ^= (b >> 47);
  b *= kMul;
  return b;
}

STATIC_INLINE uint64_t Fetch64(const char *p) {
  return uint64_in_expected_order(UNALIGNED_LOAD64(p));
}

STATIC_INLINE uint32_t Fetch32(const char *p) {
  return uint32_in_expected_order(UNALIGNED_LOAD32(p));
}

// Bitwise right rotate.  Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
STATIC_INLINE uint64_t Rotate(uint64_t val, int shift) {
  // Avoid shifting by 64: doing so yields an undefined result.
  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}

STATIC_INLINE uint64_t ShiftMix(uint64_t val) {
  return val ^ (val >> 47);
}

STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v) {
  return Hash128to64(uint128_t(u, v));
}

STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
  // Murmur-inspired hashing.
  uint64_t a = (u ^ v) * mul;
  a ^= (a >> 47);
  uint64_t b = (v ^ a) * mul;
  b ^= (b >> 47);
  b *= mul;
  return b;
}

// Some primes between 2^63 and 2^64 for various uses.
static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
static const uint64_t k1 = 0xb492b66fbe98f273ULL;
static const uint64_t k2 = 0x9ae16a3b2f90404fULL;

STATIC_INLINE uint64_t HashLen0to16(const char *s, size_t len) {
  if (len >= 8) {
    uint64_t mul = k2 + len * 2;
    uint64_t a = Fetch64(s) + k2;
    uint64_t b = Fetch64(s + len - 8);
    uint64_t c = Rotate(b, 37) * mul + a;
    uint64_t d = (Rotate(a, 25) + b) * mul;
    return HashLen16(c, d, mul);
  }
  if (len >= 4) {
    uint64_t mul = k2 + len * 2;
    uint64_t a = Fetch32(s);
    return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
  }
  if (len > 0) {
    uint8_t a = s[0];
    uint8_t b = s[len >> 1];
    uint8_t c = s[len - 1];
    uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
    uint32_t z = len + (static_cast<uint32_t>(c) << 2);
    return ShiftMix(y * k2 ^ z * k0) * k2;
  }
  return k2;
}

// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
STATIC_INLINE uint64_t HashLen17to32(const char *s, size_t len) {
  uint64_t mul = k2 + len * 2;
  uint64_t a = Fetch64(s) * k1;
  uint64_t b = Fetch64(s + 8);
  uint64_t c = Fetch64(s + len - 8) * mul;
  uint64_t d = Fetch64(s + len - 16) * k2;
  return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
                   a + Rotate(b + k2, 18) + c, mul);
}

// Return a 16-byte hash for 48 bytes.  Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
    uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
  a += w;
  b = Rotate(b + a + z, 21);
  uint64_t c = a;
  a += x;
  a += y;
  b += Rotate(a, 44);
  return make_pair(a + z, b + c);
}

// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
    const char* s, uint64_t a, uint64_t b) {
  return WeakHashLen32WithSeeds(Fetch64(s),
                                Fetch64(s + 8),
                                Fetch64(s + 16),
                                Fetch64(s + 24),
                                a,
                                b);
}

// Return an 8-byte hash for 33 to 64 bytes.
STATIC_INLINE uint64_t HashLen33to64(const char *s, size_t len) {
  uint64_t mul = k2 + len * 2;
  uint64_t a = Fetch64(s) * k2;
  uint64_t b = Fetch64(s + 8);
  uint64_t c = Fetch64(s + len - 24);
  uint64_t d = Fetch64(s + len - 32);
  uint64_t e = Fetch64(s + 16) * k2;
  uint64_t f = Fetch64(s + 24) * 9;
  uint64_t g = Fetch64(s + len - 8);
  uint64_t h = Fetch64(s + len - 16) * mul;
  uint64_t u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
  uint64_t v = ((a + g) ^ d) + f + 1;
  uint64_t w = bswap_64((u + v) * mul) + h;
  uint64_t x = Rotate(e + f, 42) + c;
  uint64_t y = (bswap_64((v + w) * mul) + g) * mul;
  uint64_t z = e + f + c;
  a = bswap_64((x + z) * mul + y) + b;
  b = ShiftMix((z + a) * mul + d + h) * mul;
  return b + x;
}

uint64_t CityHash64(const char *s, size_t len) {
  if (len <= 32) {
    if (len <= 16) {
      return HashLen0to16(s, len);
    } else {
      return HashLen17to32(s, len);
    }
  } else if (len <= 64) {
    return HashLen33to64(s, len);
  }

  // For strings over 64 bytes we hash the end first, and then as we
  // loop we keep 56 bytes of state: v, w, x, y, and z.
  uint64_t x = Fetch64(s + len - 40);
  uint64_t y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
  uint64_t z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
  pair<uint64_t, uint64_t> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
  pair<uint64_t, uint64_t> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
  x = x * k1 + Fetch64(s);

  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
  len = (len - 1) & ~static_cast<size_t>(63);
  do {
    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
    x ^= w.second;
    y += v.first + Fetch64(s + 40);
    z = Rotate(z + w.first, 33) * k1;
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    std::swap(z, x);
    s += 64;
    len -= 64;
  } while (len != 0);
  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
                   HashLen16(v.second, w.second) + x);
}

uint64_t CityHash64WithSeeds(const char *s, size_t len,
                           uint64_t seed0, uint64_t seed1) {
  return HashLen16(CityHash64(s, len) - seed0, seed1);
}

uint64_t CityHash64WithSeed(const char *s, size_t len, uint64_t seed) {
  return CityHash64WithSeeds(s, len, k2, seed);
}

int main(int argc, char** argv){
  string str = "×|Hash #1";
  uint64_t x = CityHash64(str.c_str(), str.size());
  cout<< x <<endl;
  return 0;
}

你可能感兴趣的:(Hash)