当我们使用neo4j数据库时,会发现哪怕所有属性名和值都一样的情况下,依然会重复创建节点,导致很多本来能聚合的关系变得很乱,于是我找了一些资料,在python中实现了去重的效果
# 拓扑+neo4j
from py2neo import Graph,Node,Relationship
from py2neo.matching import NodeMatcher
import pymongo
import re
# 正则匹配
ip = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
# 连接mongodb数据库
client = pymongo.MongoClient("mongodb://127.0.0.1:27017/")
db = client["traceroute"] # 注意这里是中括号不是圆括号
col = db["jp"]
# 连接neo4j数据库
graph = Graph("http://localhost:11004", username="neo4j", password='308')
# graph.delete_all()
for x in col.find():
i = 0
al = re.findall(ip, str(x))
if len(al):
for x in al:
if i == 0:
a = Node('center', ip = x)
matcher = NodeMatcher(graph)
nodelist_a = list(matcher.match('center', ip = x))
else:
matcher = NodeMatcher(graph)
nodelist_b = list(matcher.match('side', ip = x))
b = Node('side', ip = x)
if len(nodelist_a) and len(nodelist_b): # 若出发点重复和结束点均重复
graph.create(Relationship(nodelist_a[0], 'CONNECTS', nodelist_b[0]))
elif len(nodelist_a) and not len(nodelist_b): # 若只有出发点重复
graph.create(Relationship(nodelist_a[0], 'CONNECTS', b))
elif not len(nodelist_a) and len(nodelist_b): # 若只有结束点重复
graph.create(Relationship(a, 'CONNECTS', nodelist_b[0]))
else:
r = Relationship(a, 'CONNECTS', b) # 若未有重复现象
graph.create(r)
i += 1
注一:重点代码:
# 判断源点是否已存在
matcher = NodeMatcher(graph)
nodelist_a = list(matcher.match('center', ip = x))
pass
# 判断目标点是否已存在
matcher = NodeMatcher(graph)
nodelist_b = list(matcher.match('side', ip = x))
# 若出发点重复和结束点均重复
if len(nodelist_a) and len(nodelist_b):
graph.create(Relationship(nodelist_a[0], 'CONNECTS', nodelist_b[0]))
# 若只有出发点重复
elif len(nodelist_a) and not len(nodelist_b):
graph.create(Relationship(nodelist_a[0], 'CONNECTS', b))
# 若只有结束点重复
elif not len(nodelist_a) and len(nodelist_b):
graph.create(Relationship(a, 'CONNECTS', nodelist_b[0]))
# 若未有重复现象
else:
r = Relationship(a, 'CONNECTS', b)
graph.create(r)