环境
python3.6
pip3 install bitarray-0.8.1-cp36-cp36m-win_amd64.whl(pybloom_live依赖这个包,需要先安装)
pip3 install pybloom_live
下载地址:https://www.lfd.uci.edu/~gohlke/pythonlibs/
1. pybloom_live
ScalableBloomFilter
from pybloom_live import ScalableBloomFilter
#mode=ScalableBloomFilter.SMALL_SET_GROWTH
sbf = ScalableBloomFilter(initial_capacity=100, error_rate=0.001, mode=ScalableBloomFilter.LARGE_SET_GROWTH)
url = "www.baidu.com"
url2 = "www.douban,com"
sbf.add(url)
print(url in sbf) # True
print(url2 in sbf) # False
BloomFilter
from pybloom_live import BloomFilter
bf = BloomFilter(capacity=1000)
bf.add("www.baidu.com")
print("www.baidu.com" in bf) # True
print("www.douban.com" in bf) # False
2. pybloom
BloomFilter 是定容。
ScalableBloomFilter 可以自动扩容
# -*- coding: utf-8 -*-
from pybloom import BloomFilter
f = BloomFilter(capacity=1000, error_rate=0.001)# capacity是容量, error_rate 是能容忍的误报率,超过误报率,抛出异常
print([f.add(x) for x in range(10)])#[False, False, False, False, False, False, False, False, False, False]
print(all([(x in f) for x in range(10)]))#True
print(10 in f)#False
print(5 in f)#True
f = BloomFilter(capacity=1000, error_rate=0.001)
print(f.capacity)#等于capacity
print('len(f):',len(f))
for i in range(0, f.capacity):
f.add(i)
print('len(f):',len(f))
print((1.0 - (len(f) / float(f.capacity))) <= f.error_rate + 2e-18)#True
from pybloom import ScalableBloomFilter
sbf = ScalableBloomFilter(mode=ScalableBloomFilter.SMALL_SET_GROWTH)
count = 10000
for i in range(0, count):
sbf.add(i)
print((1.0 - (len(sbf) / float(count))) <= sbf.error_rate + 2e-18)#True
-