布隆过滤器:用来判断一个字符串是否在一堆字符串里
下面对布隆过滤器进行一些详细的解释
.h文件
#pragma once
#include
typedef uint64_t BitmapType;
#define BitmapMaxSize 1000
typedef struct Bitmap
{
uint64_t* data;
uint64_t capacity;//位图最多能容纳多少位
}Bitmap;
//初始化
void BitmapInit(Bitmap* bm,uint64_t capacity);
//销毁
void BitmapDestroy(Bitmap* bm);
//将某一位设置为1
void BitmapSet(Bitmap* bm,uint64_t index);
//将某一位设置为0
void BitmapUnset(Bitmap* bm,uint64_t index);
//将所有位设置为1
void BitmapFill(Bitmap* bm);
//将所有位设置为0
void BitmapClear(Bitmap* bm);
//测试某一位是否为1
int BitmapTest(Bitmap* bm,uint64_t index);
(1)检测位图中的某一位是否为1
int BitmapTest(Bitmap* bm,uint64_t index)
{
if(bm == NULL || index >= bm->capacity)
{
//非法输入
return 0;
}
uint64_t n,offset;
GetOffset(index,&n,&offset);
uint64_t ret = bm->data[n] & (0x1ul << offset);
return ret > 0 ? 1 : 0;
}
(2)将位图中的所有位都设置为0
void BitmapClear(Bitmap* bm)
{
if(bm == NULL)
{
return;
}
uint64_t size = Getsize(bm->capacity);
memset(bm->data,0x0,(sizeof(BitmapType)*size));
return;
}
(3)初始化位图
uint64_t Getsize(uint64_t capacity)
{
uint64_t size = capacity / (sizeof(BitmapType)*8)+1;
return size;
}
void BitmapInit(Bitmap* bm,uint64_t capacity)
{
if(bm == NULL)
{
return;
}
//capacity表示该位图能保存的最大的数
//比如 capacity = 100,2个元素
//比如 capacity = 200,4个元素
//比如 capacity = 300,5个元素
//比如 capacity = N,N/(sizeof(uint64_t) * 8)+ 1
bm->capacity = capacity;
//size 是我们申请内存时对应的数组元素个数
uint64_t size = Getsize(capacity);
bm->data = (BitmapType*)malloc(sizeof(BitmapType)*size);
memset(bm->data,0,sizeof(BitmapType)*size);
return;
}
(4)销毁位图
void BitmapDestroy(Bitmap* bm)
{
if(bm == NULL)
{
return;
}
bm->capacity = 0;
free(bm->data);
return;
}
(5)将位图的某一位设置为1
void GetOffset(uint64_t index,uint64_t* n,uint64_t* offset)
{
*n = index / (sizeof(BitmapType)*8);
*offset = index % (sizeof(BitmapType)*8);
return;
}
void BitmapSet(Bitmap* bm,uint64_t index)
{
if(bm == NULL || index >= bm->capacity)
{
return;
}
uint64_t n,offset;
GetOffset(index,&n,&offset);
bm->data[n] |= (0x1ul << offset);
return;
}
void TestSet()
{
TEST_HEADER;
Bitmap bm;
BitmapInit(&bm,100);
BitmapSet(&bm,50);
int ret = BitmapTest(&bm,50);
printf("ret expected 1,actual %d\n",ret);
ret = BitmapTest(&bm,20);
printf("ret expected 0,actual %d\n",ret);
}
(6)将位图中的某一位设置为0
void BitmapUnset(Bitmap* bm,uint64_t index)
{
if(bm == NULL || index >= bm->capacity)
{
return;
}
uint64_t n,offset;
GetOffset(index,&n,&offset);
bm->data[n] &= ~(0x1ul << offset);
return;
}
void TestUnset()
{
TEST_HEADER;
Bitmap bm;
BitmapInit(&bm,100);
BitmapSet(&bm,50);
int ret = BitmapTest(&bm,50);
printf("ret expected 1,actual %d\n",ret);
BitmapUnset(&bm,50);
ret = BitmapTest(&bm,50);
printf("ret expected 0,actual %d\n",ret);
}
(7)将位图的所有位都设置为1
void TestFill()
{
TEST_HEADER;
Bitmap bm;
BitmapInit(&bm,100);
BitmapFill(&bm);
int ret = BitmapTest(&bm,50);
printf("ret expected 1,actual %d\n",ret);
ret = BitmapTest(&bm,0);
printf("ret expected 1,actual %d\n",ret);
ret = BitmapTest(&bm,99);
printf("ret expected 1,actual %d\n",ret);
}
void BitmapFill(Bitmap* bm)
{
if(bm == NULL)
{
return;
}
uint64_t size = Getsize(bm->capacity);
memset(bm->data,0xff,(sizeof(BitmapType)*size));
return;
}
.h文件
#pragma once
#include"bitmap.h"
//此处定义了布隆过滤器的哈希函数,把字符串转成下标
typedef uint64_t (*BloomHash)(const char*);
#define BloomHashCount 2
typedef struct BloomFilter
{
Bitmap bm;
BloomHash bloom_hash[BloomHashCount];
}BloomFilter;
void BloomFilterInit(BloomFilter* bf);
void BloomFilterDestroy(BloomFilter* bf);
void BloomFilterInsert(BloomFilter* bf,const char* str);
int BloomFilterIsExist(BloomFilter* bf,const char* str);
(1)hash_func.c
#include
#include
size_t BKDRHash(const char* str)
{
size_t hash = 0;
size_t ch = 0;
while(ch = (size_t)*str++)
{
hash = hash * 131 +ch;
}
return hash;
}
size_t SDBMHash(const char* str)
{
size_t hash = 0;
size_t ch = 0;
while(ch = (size_t)*str++)
{
hash = hash * 65599 +ch;
}
return hash;
}
(2)初始化布隆过滤器
void BloomFilterInit(BloomFilter* bf)
{
if(bf == NULL)
{
return;
}
BitmapInit(&bf->bm,10000);
bf->bloom_hash[0] = SDBMHash;
bf->bloom_hash[1] = BKDRHash;
return;
}
(3)销毁布隆过滤器
void BloomFilterDestroy(BloomFilter* bf)
{
if(bf == NULL)
{
return;
}
bf->bloom_hash[0] = NULL;
bf->bloom_hash[1] = NULL;
BitmapDestroy(&bf->bm);
return;
}
(4)向布隆过滤器中插入一个字符串
void BloomFilterInsert(BloomFilter* bf,const char* str)
{
if(bf == NULL || str == NULL)
{
//非法输入
return;
}
size_t i = 0;
for(;i < BloomHashCount;++i)
{
uint64_t hash = bf->bloom_hash[i](str) % BitmapMaxSize;
BitmapSet(&bf->bm,hash);
}
return;
}
(5)查看布隆过滤器中是否存在一个字符串
int BloomFilterIsExist(BloomFilter* bf,const char* str)
{
if(bf == NULL || str == NULL)
{
//非法输入
return 0;
}
size_t i = 0;
for(;i < BloomHashCount;++i)
{
uint64_t hash = bf->bloom_hash[i](str) % BitmapMaxSize;
int ret = BitmapTest(&bf->bm,hash);
if(ret == 0)
{
return 0;
}
}
return 1;
}
(6)整体测试函数
void TestBloom()
{
TEST_HEADER;
BloomFilter bf;
BloomFilterInit(&bf);
BloomFilterInsert(&bf,"nihao");
BloomFilterInsert(&bf,"haha");
int ret = BloomFilterIsExist(&bf,"nihao");
printf("ret expected 1,actual %d\n",ret);
ret = BloomFilterIsExist(&bf,"hehe");
printf("ret expected 0,actual %d\n",ret);
}
上边这个布隆算法节省空间但是不支持删除算法,因为上边那个算法有可能一个位置映射了几个数,删除了一个数可能会影响到别的数;
如果我们想要使用删除算法,我们可以使用引用计数的方法,那么存放一个数的位置就不能用一个比特位了,而是可以用一个无符号整数来存放,当删除一个数的时候,如果它映射到的每个位置都大于0,就表明这个数存在,那么就让这几个数同时减1;