python+neo4j 学习资源知识图谱修改版

上一次的知识图谱只是做了数学概念方面的,这次做的包括了整个认知领域。由于覆盖的节点数比较多,就对代码和数据形式进行了修改,以进行分类。

完整代码如下:

from py2neo import Graph, Node, Relationship, NodeMatcher
import pandas as pd
from pdb import set_trace


def load_data():
    # 加载数据
    data= pd.read_excel('./entity.xlsx')

    cognitive = data['cognitive'].tolist()
    focus = data['focus'].tolist()
    project = data['project'].tolist()
    activity = data['activity'].tolist()
    age = data['age'].tolist()

    cognitive_list = [str(i) for i in cognitive]
    focus_list = [str(i) for i in focus]
    project_list = [str(i) for i in project]
    activity_list = [str(i) for i in activity]
    age_list = [str(i)for i in age]

    link_dict = dict()
    link_dict['cognitive'] = cognitive_list
    link_dict['focus'] = focus_list
    link_dict['project'] = project_list
    link_dict['activity'] = activity_list
    link_dict['age'] = age_list

    df_data = pd.DataFrame(link_dict)
    return df_data


class DataToNeo4j:
    def __init__(self):
        link = Graph("http://localhost:7474", auth=("neo4j", "a98710928"))
        self.graph = link

        self.cognitive = 'cognitive'
        self.focus = 'focus'
        self.project = 'project'
        self.activity='activity'
        self.age = 'age'

        self.graph.delete_all()   # 将之前的图  全部删除
        self.matcher = NodeMatcher(link)   # 为了查找

    def create_node(self, activity, age,project,focus,cognitive):
        # 创建节点
        for node_name in activity:
            node = Node(self.activity, name=node_name)
            self.graph.create(node)
        for node_name in age:
            node = Node(self.age, name=node_name)
            self.graph.create(node)
        for node_name in project:
            node = Node(self.project, name=node_name)
            self.graph.create(node)
        for node_name in focus:
            node = Node(self.focus, name=node_name)
            self.graph.create(node)
        for node_name in cognitive:
            node = Node(self.cognitive, name=node_name)
            self.graph.create(node)
        return

    def create_relation(self, df_data):
        m = 0
        for m in range(0, len(df_data)):
            # print(list(self.matcher.match(self.start).where('_.name=' + "'" + df_data['start'][m] + "'")))
            # 相当于在'start'标签下找   name=某个名字的节点
            # print(list(self.matcher.match(self.end).where('_.name=' + "'" + df_data['end'][m] + "'")))
            # 相当于在'end'标签下找   name=某个名字的节点'
            # 然后为这两个节点创建关系
            try:
                rel_focus = Relationship(
                    self.matcher.match(self.cognitive).where('_.name=' + "'" + df_data['cognitive'][m] + "'").first(),
                    '重点',
                    self.matcher.match(self.focus).where('_.name=' + "'" + df_data['focus'][m] + "'").first()
                )
                rel_project = Relationship(
                    self.matcher.match(self.focus).where('_.name=' + "'" + df_data['focus'][m] + "'").first(),
                    '项目',
                    self.matcher.match(self.project).where('_.name=' + "'" + df_data['project'][m] + "'").first()
                )
                rel_activity = Relationship(
                    self.matcher.match(self.project).where('_.name=' + "'" + df_data['project'][m] + "'").first(),
                    '活动',
                    self.matcher.match(self.activity).where('_.name=' + "'" + df_data['activity'][m] + "'").first()
                )
                rel_age = Relationship(
                    self.matcher.match(self.activity).where('_.name=' + "'" + df_data['activity'][m] + "'").first(),
                    '年龄',
                    self.matcher.match(self.age).where('_.name=' + "'" + df_data['age'][m] + "'").first()
                )
                self.graph.create(rel_focus)
                self.graph.create(rel_project)
                self.graph.create(rel_activity)
                self.graph.create(rel_age)
            except AttributeError as e:
                print(e, m)


def data_extraction(df_data):
    node_activity = []
    for i in df_data['activity'].tolist():
        node_activity.append(i)

    node_age = []
    for i in df_data['age'].tolist():
        node_age.append(i)

    node_project = []
    for i in df_data['project'].tolist():
        node_project.append(i)

    node_focus = []
    for i in df_data['focus'].tolist():
        node_focus.append(i)

    node_cognitive = []
    for i in df_data['cognitive'].tolist():
        node_cognitive.append(i)
    # 去重

    node_activity = list(set(node_activity))
    node_age = list(set(node_age))
    node_project = list(set(node_project))
    node_focus = list(set(node_focus))
    node_cognitive= list(set(node_cognitive))

    return node_activity,node_age,node_project,node_focus,node_cognitive


if __name__ == '__main__':
    df_data= load_data()
    node_activity,node_age,node_project,node_focus,node_cognitive = data_extraction(df_data)
    # 创建图
    create_data = DataToNeo4j()
    # 节点
    create_data.create_node(node_activity,node_age,node_project,node_focus,node_cognitive)
    # 关系
    create_data.create_relation(df_data)

数据类型样式如下python+neo4j 学习资源知识图谱修改版_第1张图片

你可能感兴趣的:(推荐算法从入门到放弃,python,neo4j,知识图谱)