hyperloglog求交集

有两种方法实现容斥原理,一种是遍历,一种是递归(DFS),对于hyperloglog求交集而言选择遍历。

第一步初始化 redis

import redis
r = redis.Redis(host='localhost', port=6379, db=0)  
r.pfadd('day1',*['user_1','user_2','user_3','user_4','user_5','user_6','user_7'])
r.pfadd('day2',*['user_8','user_2','user_3','user_4','user_5','user_6','user_7'])
r.pfadd('day3',*['user_8','user_9','user_3','user_4','user_5','user_6','user_7'])
r.pfadd('day4',*['user_8','user_9','user_10','user_4','user_5','user_6','user_7'])
r.pfadd('day5',*['user_8','user_9','user_10','user_11','user_5','user_6','user_7'])
r.pfadd('day6',*['user_8','user_9','user_10','user_11','user_12','user_6','user_7'])
r.pfadd('day7',*['user_8','user_9','user_10','user_11','user_12','user_13','user_7'])
r.pfadd('day8',*['user_8','user_9','user_10','user_11','user_12','user_13','user_14'])

第二部实现排列函数和归属函数

#求排列
from itertools import combinations
def arrangement(length):
    combins = {}
    for i in range(length):
        combins[i+1] = [c for c in combinations(range(length), (i+1))]
    return combins
#lst1是否包含lst2
def is_exist_in_tuple(lst1,lst2):
    d1 = {}
    for i in lst1:
        if i in d1:
            d1[i] += 1
        else:
            d1[i] = 1
    d2 = {}
    for i in lst2:
        if i in d2:
            d2[i] += 1
        else:
            d2[i] = 1
    for key,value in d2.items():
        if key not in d1 or value>d1[key]:
            return False
    return True

第三步 实现交集函数

#combins--排列字典
#combins_intersection--交集
#i--第几层
#j--第几个
#k--第几个的第几个
#下面开始计算交集
#l--同i
#m--同j

def intersection(lst):
    count_intersection = 0
    combins = arrangement(len(lst))
    combins_intersection = combins

    for i in range(len(combins)):
        for j in range(len(combins[i+1])):
            for k in range(len(combins[i+1][j])):
                r.pfmerge("combins_intersection%s_%s" % (i+1,j), lst[combins[i+1][j][k]])
            
            combins = arrangement(len(lst))
            combins_intersection[i+1][j] = r.pfcount("combins_intersection%s_%s" % (i+1,j))
            
            if(i>0):
                for l in range((i)):
                    for m in range(len(combins[l+1])):
                        if(is_exist_in_tuple(combins[i+1][j], combins[l+1][m])):
                            combins_intersection[i+1][j] += ((-1)**(l+1))*abs(combins_intersection[l+1][m]) 
                            
        for n in range(len(combins[i+1])):
            combins_intersection[i+1][n] = abs(combins_intersection[i+1][n])
    
    for i in range(len(combins)):
        for j in range(len(combins[i+1])):
            r.delete("combins_intersection%s_%s" % (i+1,j))
    return combins_intersection[i+1][0]

测试

combins_intersection1 = intersection(["day1","day2","day3","day4","day5","day6","day7","day8"])
combins_intersection = intersection(["day1","day2","day3","day4","day5","day6","day7"])

看结果输出为

print combins_intersection
print "----------------"
print combins_intersection1

1
----------------
0

大功告成

你可能感兴趣的:(hyperloglog求交集)