search——Bloom Filter

详情,看这一篇博客

package main

import (
	"fmt"
	"math"
	"strconv"
)

type BloomFilter struct {
	*bitMap
	k      uint64  // hash function count
	m      uint64  // bits array length
	n      uint64  // insert n number
	p      float64 // 失误率
	p_true float64 // 真是失误率
}

func (b *BloomFilter) Insert(id uint64) {
	for i := 0; uint64(i) < b.k; i++ {
		b.bitMap.insert(getHash(strconv.Itoa(int(id)), uint64(i)) % b.bitMap.Size)
	}
}

func (b *BloomFilter) IsExist(id uint64) bool {
	for i := 0; uint64(i) < b.k; i++ {
		if !b.bitMap.isExist(getHash(strconv.Itoa(int(id)), uint64(i)) % b.bitMap.Size) {
			return false
		}
	}
	return true
}

func NewBloomFilter(p float64, n uint64) *BloomFilter {
	m := uint64(-(float64(n)*math.Log2(p))/(math.Ln2*math.Ln2) + 0.5)
	k := uint64(math.Ln2*(float64(m)/float64(n)) + 0.5)
	p_true := math.Pow(1-math.Pow(math.E, -(float64(n)*float64(k))/float64(m)), float64(k))

	bl := &BloomFilter{bitMap: newBitMap(m/64 + 1)}
	bl.m = m           // bits array length
	bl.n = n           // insert n number
	bl.k = k           // hash function count
	bl.p = p           // 失误率
	bl.p_true = p_true // 真实失误率
	return bl
}

func main() {
	bl := NewBloomFilter(0.00000001, 100)
	bl.Insert(10000)
	fmt.Println(bl.IsExist(10000))
	fmt.Println(bl.IsExist(10001))
}

func main1() {
	p := 0.00000000001
	n := 100
	m := uint(-(float64(n)*math.Log2(p))/(math.Ln2*math.Ln2) + 0.5)
	fmt.Println(m/64 + 1)
	k := uint64(math.Ln2*(float64(m)/float64(n)) + 0.5)
	fmt.Println(m)
	fmt.Println(k)

	p_true := float64(0)
	p_true = math.Pow(1-math.Pow(math.E, -(float64(n)*float64(k))/float64(m)), float64(k))
	fmt.Println(p_true < p)

}

type bitMap struct {
	Map  []uint64
	Cap  uint64
	Size uint64
}

func newBitMap(cap uint64) *bitMap {
	return &bitMap{make([]uint64, cap), cap, cap * 64}
}

func (b *bitMap) insert(id uint64) {
	b.Map[id/64] |= 1 << (id % 64)
}

func (b *bitMap) isExist(id uint64) bool {
	return (b.Map[id/64])&(1<<(id%64)) > 0
}

func (b *bitMap) delete(id uint64) {
	b.Map[id/64] &^= 1 << (id % 64)
}

func (b *bitMap) range_() (res []uint64) {
	for i := 0; uint64(i) < b.Cap; i++ {
		if b.Map[i] > 0 {
			for j, bits := 0, b.Map[i]; bits > 0; j, bits = j+1, bits>>1 {
				if bits&1 == 1 {
					res = append(res, uint64(i)*64+uint64(j))
				}
			}
		}
	}
	return res
}

func getHash(str string, i uint64) uint64 {
	return hash(str) + i*hash1(str)
}

func hash(str string) (res uint64) {
	factor := ")k+,p-m~90$#2(*&6q}ev73541]n{fl['?|c"
	str = str + factor[:16-(len(str)%16)]
	for start, end := 0, len(str)/16; end > 0; start, end = start+16, end-1 {
		h0 := uint64(0)
		for i := 15; i >= 0; i-- {
			h0 += uint64(str[start+i]-byte(i))*uint64(pow(36, i)) ^ uint64(factor[(i+start)%36])
		}
		h0 *= 0x12345
		res += (res >> 30) ^ h0<<34
		res += (res >> 32) | (h0 << 32) ^ uint64(start*start*start) ^ uint64(factor[start%36])
		res += (res>>16)&(h0<<48) ^ uint64(factor[35-start%36]) ^ uint64(start-end*end)
		res += (res >> 17) | (h0 << 47) ^ uint64(start*start)
	}
	res += 235791113<<32 | 1719232931>>32
	return res
}

func hash1(str string) (res uint64) {
	factor := ")k+,p-m~90$#2(*&6q}ev73541]n{fl['?|c"
	str = str + factor[:16-(len(str)%16)]
	for start, end := 0, len(str)/16; end > 0; start, end = start+16, end-1 {
		h0 := uint64(0)
		for i := 15; i >= 0; i-- {
			h0 += uint64(str[start+i]-byte(i))*uint64(pow(36, i)) ^ uint64(factor[(i+start)%36])
		}
		h0 *= 0x54321
		res += (res >> 32) | (h0 << 32) ^ uint64(start*start*start) ^ uint64(factor[start%36])
		res += (res>>16)&(h0<<48) ^ uint64(factor[35-start%36]) ^ uint64(start-end*end)
		res += (res >> 17) | (h0 << 47) ^ uint64(start*start)
		res += (res >> 30) ^ h0<<34
	}
	res += 1719232931<<32 | 235791113>>32
	return res
}

func pow(x, n int) int {
	if n == 0 {
		return 1
	} else {
		for (n & 1) == 0 {
			n, x = n>>1, x*x
		}
	}
	result := x
	n >>= 1
	for n != 0 {
		x *= x
		if n&1 == 1 {
			result *= x
		}
		n >>= 1
	}
	return result
}

你可能感兴趣的:(数据结构与算法,search)