头文件
#pragma once
#ifndef _COMMON_H_
#define _COMMON_H_
#define size_t unsigned long
size_t BKDRHash(const char *str);
size_t SDBMHash(const char *str);
size_t RSHash(const char *str);
size_t APHash(const char *str);
size_t JSHash(const char *str);
#endif //!_COMMON_H_
#pragma once
#ifndef _BITMAP_H_
#define _BITMAP_H_
typedef unsigned long size_t;
typedef struct HashBitMap
{
size_t* _BitMap;
size_t _size;
size_t _capacity;
}BitMap,*pBitMap;
void InitBitMap(pBitMap bm, size_t size);
int InsertBitMap(pBitMap bm, size_t data);
int FindBitMap(pBitMap bm, size_t data);
void Set(pBitMap bm, size_t seat, size_t num);//比特位置1
void ReSet(pBitMap bm, size_t seat, size_t num);//比特位置0
size_t SizeBitMap(BitMap* bmp);
size_t CountBitMap(BitMap* bmp);
void DestroyBitMap(BitMap* bmp);
#endif//!_BITMAP_H_
#pragma once
#ifndef _BLOOMFILTER_H_
#define _BLOOMFILRR_H_
#include"BitMap.h"
#include"Common.h"
typedef char* DataType;
typedef size_t(*PHF)(DataType);
#define FUNCNUM 5
typedef struct BloomFilter
{
BitMap _bmp;
PHF _HashFunc[FUNCNUM];
size_t _size;
}BF;
void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size);
int InsertBF(BF* bf, DataType key);
int IsInBloomFilter(BF* bf, DataType key);
void DestroyBloomFilter(BF* bf);
#endif // ! _BLOOMFILTER_H_
源文件
#include"Common.h"
size_t BKDRHash(const char *str)
{
register size_t hash = 0;
size_t ch;
while (ch = (size_t)*str++)
{
hash = hash * 131 + ch; // 也可以乘以31、131、1313、13131、131313..
// 有人说将乘法分解为位运算及加减法可以提高效率,如将上式表达为:hash = hash << 7 + hash << 1 + hash + ch;
// 但其实在Intel平台上,CPU内部对二者的处理效率都是差不多的,
// 我分别进行了100亿次的上述两种运算,发现二者时间差距基本为0(如果是Debug版,分解成位运算后的耗时还要高1/3);
// 在ARM这类RISC系统上没有测试过,由于ARM内部使用Booth's Algorithm来模拟32位整数乘法运算,它的效率与乘数有关:
// 当乘数8-31位都为1或0时,需要1个时钟周期
// 当乘数16-31位都为1或0时,需要2个时钟周期
// 当乘数24-31位都为1或0时,需要3个时钟周期
// 否则,需要4个时钟周期
// 因此,虽然我没有实际测试,但是我依然认为二者效率上差别不大
}
return hash;
}
/// @brief SDBM Hash Function
/// @detail 本算法是由于在开源项目SDBM(一种简单的数据库引擎)中被应用而得名,它与BKDRHash思想一致,只是种子不同而已。
size_t SDBMHash(const char *str)
{
register size_t hash = 0;
size_t ch;
while (ch = (size_t)*str++)
{
hash = 65599 * hash + ch;
//hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
/// @brief RS Hash Function
/// @detail 因Robert Sedgwicks在其《Algorithms in C》一书中展示而得名。
size_t RSHash(const char *str)
{
register size_t hash = 0;
size_t magic = 63689;
size_t ch;
while (ch = (size_t)*str++)
{
hash = hash * magic + ch;
magic *= 378551;
}
return hash;
}
/// @brief AP Hash Function
/// @detail 由Arash Partow发明的一种hash算法。
size_t APHash(const char *str)
{
register size_t hash = 0;
size_t ch;
for (long i = 0; ch = (size_t)*str++; i++)
{
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
}
}
return hash;
}
/// @brief JS Hash Function
/// 由Justin Sobel发明的一种hash算法。
size_t JSHash(const char *str)
{
if (!*str) // 这是由本人添加,以保证空字符串返回哈希值0
return 0;
register size_t hash = 1315423911;
size_t ch = 0;;
while (ch = (size_t)*str++)
{
hash ^= ((hash << 5) + ch + (hash >> 2));
}
return hash;
}
#include
#include
#include
#include"BitMap.h"
char* Cou = "\0\1\1\2\1\2\2\3";
void InitBitMap(pBitMap bm, size_t size)
{
bm->_capacity = (size >> 5) + 1;
bm->_size = 0;
bm->_BitMap = (size_t*)malloc(sizeof(size_t) * bm->_capacity);
assert(bm->_BitMap);
for (int i = 0; i < bm->_capacity; i++)
bm->_BitMap[i] = 0;
}
int InsertBitMap(pBitMap bm, size_t data)
{
if (FindBitMap(bm, data) == 1)
return 0;
size_t Addr = data >> 5;
size_t bit = data % 32;
Set(bm, Addr, bit);
bm->_size++;
return 1;
}
void Set(pBitMap bm, size_t Addr, size_t bit)//比特位置1
{
bm->_BitMap[Addr] |= 1 << bit;
}
void ReSet(pBitMap bm, size_t Addr, size_t bit)//比特位置0
{
bm->_BitMap[Addr] &= ~(1 << bit);
}
int FindBitMap(pBitMap bm, size_t data)
{
int Addr = data >> 5;
if (Addr >= bm->_capacity)
return 0;
int bit = data % 32;
return ((bm->_BitMap[Addr] & (1 << bit)) != 0);
}
size_t SizeBitMap(BitMap* bm)
{
assert(bm);
return bm->_size;
}
size_t CountBitMap(BitMap* bm)
{
assert(bm);
char* Bit4 = NULL;
size_t count = 0;
for (size_t i = 0; i < bm->_capacity; i++)
{
Bit4 = (char*)&bm->_BitMap[i];
int j = 0;
while (j < 4)
{
size_t num = *Bit4 & 7;
count += Cou[num];
num = *Bit4 >> 4;
count += Cou[num];
Bit4 ++;
j++;
}
}
return count;
}
void DestroyBitMap(BitMap* bm)
{
assert(bm);
free(bm->_BitMap);
bm->_size = 0;
bm->_capacity = 0;
}
#include
#include
#include"BloomFilter.h"
PHF Func[FUNCNUM] = { BKDRHash,SDBMHash,RSHash,APHash,JSHash };
void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size)
{
assert(bf);
InitBitMap(&bf->_bmp, size);
for(int i =0; i_HashFunc[i] = hashFunc[i];
bf->_size = 0;
}
int InsertBF(BF* bf, DataType key)
{
assert(bf);
size_t Addr[5] = {0};
int flag = 1;
size_t Max_num = bf->_bmp._capacity * 32;
for (int i = 0; i < FUNCNUM; i++)
{
Addr[i] = bf->_HashFunc[i](key);
if (Addr[i] >= Max_num)
Addr[i] %= Max_num;
}
for (int i = 0; i < FUNCNUM; i++)
{
if (InsertBitMap(&bf->_bmp._BitMap, Addr[i]))
flag = 1;
}
bf->_size += flag;
}
int IsInBloomFilter(BF* bf, DataType key)
{
assert(bf);
size_t Addr[FUNCNUM];
size_t Max_num = bf->_bmp._capacity * 32;
for (int i = 0; i < FUNCNUM; i++)
{
Addr[i] = bf->_HashFunc[i](key);
if (Addr[i] >= Max_num)
Addr[i] %= Max_num;
}
for (int i = 0; i < FUNCNUM; i++)
{
if (0 == FindBitMap(&bf->_bmp, Addr[i]))
return 0;
}
return 1;
}
void DestroyBloomFilter(BF* bf)
{
assert(bf);
DestroyBitMap(&bf->_bmp);
bf->_size = 0;
}
void test()
{
BF bf;
InitBloomFilter(&bf, Func, 1000);
InsertBF(&bf, "啊啊");
InsertBF(&bf, "不变");
InsertBF(&bf, "尺寸");
InsertBF(&bf, "大大");
if (1 == IsInBloomFilter(&bf, "啊啊"))
printf("Is In BlooFilter!!!!\n");
else
printf("Is Not In BlooFilter!!!!\n");
if (1 == IsInBloomFilter(&bf, "不变"))
printf("Is In BlooFilter!!!!\n");
else
printf("Is Not In BlooFilter!!!!\n");
if (1 == IsInBloomFilter(&bf, "尺寸"))
printf("Is In BlooFilter!!!!\n");
else
printf("Is Not In BlooFilter!!!!\n");
if (1 == IsInBloomFilter(&bf, "大大"))
printf("Is In BlooFilter!!!!\n");
else
printf("Is Not In BlooFilter!!!!\n");
if (1 == IsInBloomFilter(&bf, "发发发"))
printf("Is In BlooFilter!!!!\n");
else
printf("Is Not In BlooFilter!!!!\n");
DestroyBloomFilter(&bf);
}
int main()
{
test();
system("pause");
return 0;
}