BloomFilter_布隆过滤器

头文件

  • Common.h
#pragma once

#ifndef _COMMON_H_
#define _COMMON_H_

#define size_t unsigned long 

size_t BKDRHash(const char *str);

size_t SDBMHash(const char *str);

size_t RSHash(const char *str);

size_t APHash(const char *str);

size_t JSHash(const char *str);



#endif //!_COMMON_H_
  • BitMap.h
#pragma once


#ifndef _BITMAP_H_
#define _BITMAP_H_

typedef  unsigned long  size_t;

typedef struct  HashBitMap
{
    size_t* _BitMap;
    size_t _size;
    size_t _capacity;
}BitMap,*pBitMap;


void InitBitMap(pBitMap bm, size_t size);
int InsertBitMap(pBitMap bm, size_t data);
int FindBitMap(pBitMap bm, size_t data);

void Set(pBitMap bm, size_t seat, size_t num);//比特位置1
void ReSet(pBitMap bm, size_t seat, size_t num);//比特位置0

size_t SizeBitMap(BitMap* bmp);
size_t CountBitMap(BitMap* bmp);
void DestroyBitMap(BitMap* bmp);


#endif//!_BITMAP_H_
  • BloomFilter.h
#pragma once
#ifndef  _BLOOMFILTER_H_
#define  _BLOOMFILRR_H_

#include"BitMap.h"
#include"Common.h"

typedef char* DataType;
typedef size_t(*PHF)(DataType);
#define FUNCNUM 5 


typedef struct BloomFilter
{
    BitMap _bmp;
    PHF _HashFunc[FUNCNUM];
    size_t _size;
}BF;

void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size);
int InsertBF(BF* bf, DataType key);
int IsInBloomFilter(BF* bf, DataType key);
void DestroyBloomFilter(BF* bf);

#endif // ! _BLOOMFILTER_H_

源文件

  • Common.c

#include"Common.h"


size_t BKDRHash(const char *str)
{
    register size_t hash = 0;
    size_t ch;
    while (ch = (size_t)*str++)
    {
        hash = hash * 131 + ch;   // 也可以乘以31、131、1313、13131、131313..  
                                  // 有人说将乘法分解为位运算及加减法可以提高效率,如将上式表达为:hash = hash << 7 + hash << 1 + hash + ch;  
                                  // 但其实在Intel平台上,CPU内部对二者的处理效率都是差不多的,  
                                  // 我分别进行了100亿次的上述两种运算,发现二者时间差距基本为0(如果是Debug版,分解成位运算后的耗时还要高1/3);  
                                  // 在ARM这类RISC系统上没有测试过,由于ARM内部使用Booth's Algorithm来模拟32位整数乘法运算,它的效率与乘数有关:  
                                  // 当乘数8-31位都为1或0时,需要1个时钟周期  
                                  // 当乘数16-31位都为1或0时,需要2个时钟周期  
                                  // 当乘数24-31位都为1或0时,需要3个时钟周期  
                                  // 否则,需要4个时钟周期  
                                  // 因此,虽然我没有实际测试,但是我依然认为二者效率上差别不大          
    }
    return hash;
}
/// @brief SDBM Hash Function  
/// @detail 本算法是由于在开源项目SDBM(一种简单的数据库引擎)中被应用而得名,它与BKDRHash思想一致,只是种子不同而已。  

size_t SDBMHash(const char *str)
{
    register size_t hash = 0;
    size_t ch;
    while (ch = (size_t)*str++)
    {
        hash = 65599 * hash + ch;
        //hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;  
    }
    return hash;
}
/// @brief RS Hash Function  
/// @detail 因Robert Sedgwicks在其《Algorithms in C》一书中展示而得名。  

size_t RSHash(const char *str)
{
    register size_t hash = 0;
    size_t magic = 63689;
    size_t ch;
    while (ch = (size_t)*str++)
    {
        hash = hash * magic + ch;
        magic *= 378551;
    }
    return hash;
}
/// @brief AP Hash Function  
/// @detail 由Arash Partow发明的一种hash算法。  
size_t APHash(const char *str)
{
    register size_t hash = 0;
    size_t ch;
    for (long i = 0; ch = (size_t)*str++; i++)
    {
        if ((i & 1) == 0)
        {
            hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
        }
        else
        {
            hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
        }
    }
    return hash;
}
/// @brief JS Hash Function  
/// 由Justin Sobel发明的一种hash算法。  

size_t JSHash(const char *str)
{
    if (!*str)        // 这是由本人添加,以保证空字符串返回哈希值0  
        return 0;
    register size_t hash = 1315423911;
    size_t ch = 0;;
    while (ch = (size_t)*str++)
    {
        hash ^= ((hash << 5) + ch + (hash >> 2));
    }
    return hash;
}
  • BitMap.c

#include
#include
#include
#include"BitMap.h"

char* Cou = "\0\1\1\2\1\2\2\3";

void InitBitMap(pBitMap bm, size_t size)
{
    bm->_capacity = (size >> 5) + 1;

    bm->_size = 0;

    bm->_BitMap = (size_t*)malloc(sizeof(size_t) * bm->_capacity);

    assert(bm->_BitMap);

    for (int i = 0; i < bm->_capacity; i++)
    bm->_BitMap[i] = 0;

}

int InsertBitMap(pBitMap bm, size_t data)
{
    if (FindBitMap(bm, data) == 1)
        return 0;

    size_t Addr = data >> 5;

    size_t bit = data % 32;

    Set(bm, Addr, bit);

    bm->_size++;

    return 1;
}

void Set(pBitMap bm, size_t Addr, size_t bit)//比特位置1
{
    bm->_BitMap[Addr] |= 1 << bit;
}

void ReSet(pBitMap bm, size_t Addr, size_t bit)//比特位置0
{
    bm->_BitMap[Addr] &= ~(1 << bit);
}

int FindBitMap(pBitMap bm, size_t data)
{
    int Addr = data >> 5;

    if (Addr >= bm->_capacity)
        return 0;

    int bit = data % 32;

    return ((bm->_BitMap[Addr] & (1 << bit)) != 0);
}

size_t SizeBitMap(BitMap* bm)
{
    assert(bm);

    return bm->_size;
}
size_t CountBitMap(BitMap* bm)
{
    assert(bm);

    char* Bit4 = NULL;
    size_t count = 0;
    for (size_t i = 0; i < bm->_capacity; i++)
    {
        Bit4 = (char*)&bm->_BitMap[i];

        int j = 0;
        while (j < 4)
        {
            size_t num = *Bit4 & 7;
            count += Cou[num];
            num = *Bit4 >> 4;
            count += Cou[num];
            Bit4 ++;
            j++;
        }
    }

    return count;
}
void DestroyBitMap(BitMap* bm)
{
    assert(bm);

    free(bm->_BitMap);

    bm->_size = 0;

    bm->_capacity = 0;
}
  • BloomFilter.c
#include
#include
#include"BloomFilter.h"

PHF Func[FUNCNUM] = { BKDRHash,SDBMHash,RSHash,APHash,JSHash };

void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size)
{
    assert(bf);

    InitBitMap(&bf->_bmp, size);

    for(int i =0; i_HashFunc[i] = hashFunc[i];

    bf->_size = 0;
}

int InsertBF(BF* bf, DataType key)
{
    assert(bf);

    size_t Addr[5] = {0};
    int flag = 1;

    size_t Max_num = bf->_bmp._capacity * 32;

    for (int i = 0; i < FUNCNUM; i++)
    {
        Addr[i] = bf->_HashFunc[i](key);
        if (Addr[i] >= Max_num)
            Addr[i] %= Max_num;
    }

    for (int i = 0; i < FUNCNUM; i++)
    {
        if (InsertBitMap(&bf->_bmp._BitMap, Addr[i]))
            flag = 1;
    }

    bf->_size += flag;

}

int IsInBloomFilter(BF* bf, DataType key)
{

    assert(bf);

    size_t Addr[FUNCNUM];

    size_t Max_num = bf->_bmp._capacity * 32;


    for (int i = 0; i < FUNCNUM; i++)
    {
        Addr[i] = bf->_HashFunc[i](key);
        if (Addr[i] >= Max_num)
            Addr[i] %= Max_num;
    }

    for (int i = 0; i < FUNCNUM; i++)
    {
        if (0 == FindBitMap(&bf->_bmp, Addr[i]))
            return 0;
    }
    return 1;
}

void DestroyBloomFilter(BF* bf)
{
    assert(bf);

    DestroyBitMap(&bf->_bmp);

    bf->_size = 0;
}


void test()
{
    BF bf;

    InitBloomFilter(&bf, Func, 1000);

    InsertBF(&bf, "啊啊");
    InsertBF(&bf, "不变");
    InsertBF(&bf, "尺寸");
    InsertBF(&bf, "大大");


    if (1 == IsInBloomFilter(&bf, "啊啊"))
        printf("Is In BlooFilter!!!!\n");
    else
        printf("Is Not In BlooFilter!!!!\n");

    if (1 == IsInBloomFilter(&bf, "不变"))
        printf("Is In BlooFilter!!!!\n");
    else
        printf("Is Not In BlooFilter!!!!\n");

    if (1 == IsInBloomFilter(&bf, "尺寸"))
        printf("Is In BlooFilter!!!!\n");
    else
        printf("Is Not In BlooFilter!!!!\n");

    if (1 == IsInBloomFilter(&bf, "大大"))
        printf("Is In BlooFilter!!!!\n");
    else
        printf("Is Not In BlooFilter!!!!\n");

    if (1 == IsInBloomFilter(&bf, "发发发"))
        printf("Is In BlooFilter!!!!\n");
    else
        printf("Is Not In BlooFilter!!!!\n");



    DestroyBloomFilter(&bf);

}


int main()
{
    test();

    system("pause");
    return 0;
}

你可能感兴趣的:(数据结构)