上一次的知识图谱只是做了数学概念方面的,这次做的包括了整个认知领域。由于覆盖的节点数比较多,就对代码和数据形式进行了修改,以进行分类。
完整代码如下:
from py2neo import Graph, Node, Relationship, NodeMatcher
import pandas as pd
from pdb import set_trace
def load_data():
# 加载数据
data= pd.read_excel('./entity.xlsx')
cognitive = data['cognitive'].tolist()
focus = data['focus'].tolist()
project = data['project'].tolist()
activity = data['activity'].tolist()
age = data['age'].tolist()
cognitive_list = [str(i) for i in cognitive]
focus_list = [str(i) for i in focus]
project_list = [str(i) for i in project]
activity_list = [str(i) for i in activity]
age_list = [str(i)for i in age]
link_dict = dict()
link_dict['cognitive'] = cognitive_list
link_dict['focus'] = focus_list
link_dict['project'] = project_list
link_dict['activity'] = activity_list
link_dict['age'] = age_list
df_data = pd.DataFrame(link_dict)
return df_data
class DataToNeo4j:
def __init__(self):
link = Graph("http://localhost:7474", auth=("neo4j", "a98710928"))
self.graph = link
self.cognitive = 'cognitive'
self.focus = 'focus'
self.project = 'project'
self.activity='activity'
self.age = 'age'
self.graph.delete_all() # 将之前的图 全部删除
self.matcher = NodeMatcher(link) # 为了查找
def create_node(self, activity, age,project,focus,cognitive):
# 创建节点
for node_name in activity:
node = Node(self.activity, name=node_name)
self.graph.create(node)
for node_name in age:
node = Node(self.age, name=node_name)
self.graph.create(node)
for node_name in project:
node = Node(self.project, name=node_name)
self.graph.create(node)
for node_name in focus:
node = Node(self.focus, name=node_name)
self.graph.create(node)
for node_name in cognitive:
node = Node(self.cognitive, name=node_name)
self.graph.create(node)
return
def create_relation(self, df_data):
m = 0
for m in range(0, len(df_data)):
# print(list(self.matcher.match(self.start).where('_.name=' + "'" + df_data['start'][m] + "'")))
# 相当于在'start'标签下找 name=某个名字的节点
# print(list(self.matcher.match(self.end).where('_.name=' + "'" + df_data['end'][m] + "'")))
# 相当于在'end'标签下找 name=某个名字的节点'
# 然后为这两个节点创建关系
try:
rel_focus = Relationship(
self.matcher.match(self.cognitive).where('_.name=' + "'" + df_data['cognitive'][m] + "'").first(),
'重点',
self.matcher.match(self.focus).where('_.name=' + "'" + df_data['focus'][m] + "'").first()
)
rel_project = Relationship(
self.matcher.match(self.focus).where('_.name=' + "'" + df_data['focus'][m] + "'").first(),
'项目',
self.matcher.match(self.project).where('_.name=' + "'" + df_data['project'][m] + "'").first()
)
rel_activity = Relationship(
self.matcher.match(self.project).where('_.name=' + "'" + df_data['project'][m] + "'").first(),
'活动',
self.matcher.match(self.activity).where('_.name=' + "'" + df_data['activity'][m] + "'").first()
)
rel_age = Relationship(
self.matcher.match(self.activity).where('_.name=' + "'" + df_data['activity'][m] + "'").first(),
'年龄',
self.matcher.match(self.age).where('_.name=' + "'" + df_data['age'][m] + "'").first()
)
self.graph.create(rel_focus)
self.graph.create(rel_project)
self.graph.create(rel_activity)
self.graph.create(rel_age)
except AttributeError as e:
print(e, m)
def data_extraction(df_data):
node_activity = []
for i in df_data['activity'].tolist():
node_activity.append(i)
node_age = []
for i in df_data['age'].tolist():
node_age.append(i)
node_project = []
for i in df_data['project'].tolist():
node_project.append(i)
node_focus = []
for i in df_data['focus'].tolist():
node_focus.append(i)
node_cognitive = []
for i in df_data['cognitive'].tolist():
node_cognitive.append(i)
# 去重
node_activity = list(set(node_activity))
node_age = list(set(node_age))
node_project = list(set(node_project))
node_focus = list(set(node_focus))
node_cognitive= list(set(node_cognitive))
return node_activity,node_age,node_project,node_focus,node_cognitive
if __name__ == '__main__':
df_data= load_data()
node_activity,node_age,node_project,node_focus,node_cognitive = data_extraction(df_data)
# 创建图
create_data = DataToNeo4j()
# 节点
create_data.create_node(node_activity,node_age,node_project,node_focus,node_cognitive)
# 关系
create_data.create_relation(df_data)