from itertools import combinations
def comb(lst):
ret=[]
for i in range(1,len(lst)+1):
ret+=list(combinations(lst,i))
return ret
class AprLayer(object):
d=dict()
def __init__(self):
self.d=dict()
class AprNode(object):
def __init__(self,node):
self.s=set(node)
self.size=len(self.s)
self.lnk_nodes=dict()
self.num=0
def __hash__(self):
return hash("__".join(sorted([str(itm) for itm in list(self.s)])))
def __eq__(self, other):
if "__".join(sorted([str(itm) for itm in list(self.s)]))=="__".join(sorted([str(itm) for itm in list(other.s)])):
return True
return False
def isSubnode(self,node):
return self.s.issubset(node.s)
def incNum(self,num=1):
self.num+=num
def addLnk(self,node):
self.lnk_nodes[node]=node.s
class AprBlk():
def __init__(self,data):
cnt=0
self.apr_layers = dict()
self.data_num=len(data)
for datum in data:
cnt+=1
datum=comb(datum)
nodes=[AprNode(da) for da in datum]
for node in nodes:
if not node.size in self.apr_layers:
self.apr_layers[node.size]=AprLayer()
if not node in self.apr_layers[node.size].d:
self.apr_layers[node.size].d[node]=node
self.apr_layers[node.size].d[node].incNum()
for node in nodes:
if node.size==1:
continue
for sn in node.s:
sub_n=AprNode(node.s-set([sn]))
self.apr_layers[node.size-1].d[sub_n].addLnk(node)
def getFreqItems(self,thd=1,hd=1):
freq_items=[]
for layer in self.apr_layers:
for node in self.apr_layers[layer].d:
if self.apr_layers[layer].d[node].num<thd:
continue
freq_items.append((self.apr_layers[layer].d[node].s,self.apr_layers[layer].d[node].num))
freq_items.sort(key=lambda x:x[1],reverse = True)
return freq_items[:hd]
def getConf(self,low=True, h_thd=10, l_thd=1, hd=1):
confidence = []
for layer in self.apr_layers:
for node in self.apr_layers[layer].d:
if self.apr_layers[layer].d[node].num < h_thd:
continue
for lnk_node in node.lnk_nodes:
if lnk_node.num < l_thd:
continue
conf = float(lnk_node.num) / float(node.num)
confidence.append([node.s, node.num, lnk_node.s, lnk_node.num, conf])
confidence.sort(key=lambda x: x[4])
if low:
return confidence[:hd]
else:
return confidence[-hd::-1]
class AssctAnaClass():
def fit(self,data):
self.apr_blk=AprBlk(data)
return self
def get_freq(self,thd=1,hd=1):
return self.apr_blk.getFreqItems(thd=thd,hd=hd)
def get_conf_high(self,thd,h_thd=10):
return self.apr_blk.getConf(low=False, h_thd=h_thd, l_thd=thd)
def get_conf_low(self,thd,hd,l_thd=1):
return self.apr_blk.getConf(h_thd=thd,l_thd=l_thd,hd=hd)
def main():
data=[
["牛奶","啤酒","尿布"],
["牛奶","啤酒","咖啡","尿布"],
["香肠","牛奶","饼干"],
["尿布","果汁","啤酒"],
["钉子","啤酒"],
["尿布","毛巾","香肠"],
["啤酒","毛巾","尿布","饼干"]
]
print("Freq",AssctAnaClass().fit(data).get_freq(thd=3,hd=10))
print("Conf",AssctAnaClass().fit(data).get_conf_high(thd=3,h_thd=3))
if __name__=="__main__":
main()
Freq [({'啤酒'}, 5), ({'尿布'}, 5), ({'啤酒', '尿布'}, 4), ({'牛奶'}, 3)]
Conf [[{'尿布'}, 5, {'啤酒', '尿布'}, 4, 0.8], [{'啤酒'}, 5, {'啤酒', '尿布'}, 4, 0.8]]
by CyrusMay 2022 04 05