参考链接:https://github.com/jm199504/Financial-Knowledge-Graphs/tree/master
from pandas import DataFrame
from py2neo import Graph,Node,Relationship,NodeMatcher
import pandas as pd
import numpy as np
import os
from py2neo import Graph, Node, Relationship, walk, NodeMatcher, RelationshipMatcher
import pandas as pd
import json
from py2neo import Graph
uri = "http://neo4j:neo4j@localhost:7474"
graph = Graph(uri)
a = Node('Person',name='Tom')
graph.create(a)
b = Node('Person',name='Bob')
graph.create(b)
r = Relationship(a,'KNOWS',b)
graph.create(r)
node = DataFrame(graph.run('MATCH (n:`Person`) RETURN n LIMIT 25'))
relation = DataFrame(graph.run('MATCH (n:`Person`)-[r]->(m:`Person`) return n,m,type(r)'))
graph.run('MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r')
(No data)
stock = pd.read_csv('stock_basic.csv',encoding="gbk")
holder = pd.read_csv('stock_holders.csv',encoding="gbk")
concept_num = pd.read_csv('concept.csv',encoding="gbk")
concept = pd.read_csv('stock_concept.csv',encoding="gbk")
sh = pd.read_csv('sh.csv')
sz = pd.read_csv('sz.csv')
corr = pd.read_csv('corr.csv')
stock.head()
|
Unnamed: 0 |
TS代码 |
股票代码 |
股票名称 |
行业 |
0 |
0 |
000001.SZ |
1 |
平安银行 |
银行 |
1 |
1 |
000002.SZ |
2 |
万科A |
全国地产 |
2 |
2 |
000004.SZ |
4 |
国华网安 |
互联网 |
3 |
3 |
000005.SZ |
5 |
世纪星源 |
环境保护 |
4 |
4 |
000006.SZ |
6 |
深振业A |
区域地产 |
holder.head()
|
Unnamed: 0 |
ts_code |
ann_date |
end_date |
holder_name |
hold_amount |
hold_ratio |
0 |
0 |
000001.SZ |
20190307 |
20181231 |
新华人寿保险股份有限公司-分红-个人分红-018L-FH002深 |
4.960350e+07 |
0.29 |
1 |
1 |
000001.SZ |
20190307 |
20181231 |
中国平安保险(集团)股份有限公司-集团本级-自有资金 |
8.510493e+09 |
49.56 |
2 |
2 |
000001.SZ |
20190307 |
20181231 |
中国平安人寿保险股份有限公司-自有资金 |
1.049463e+09 |
6.11 |
3 |
3 |
000001.SZ |
20190307 |
20181231 |
香港中央结算有限公司(陆股通) |
4.307515e+08 |
2.51 |
4 |
4 |
000001.SZ |
20190307 |
20181231 |
中国证券金融股份有限公司 |
4.292327e+08 |
2.50 |
concept_num.head()
|
Unnamed: 0 |
code |
name |
src |
0 |
0 |
TS0 |
密集调研 |
ts |
1 |
1 |
TS1 |
南北船合并 |
ts |
2 |
2 |
TS2 |
5G |
ts |
3 |
3 |
TS3 |
机场 |
ts |
4 |
4 |
TS4 |
高价股 |
ts |
concept.head()
|
Unnamed: 0 |
id |
concept_name |
ts_code |
name |
0 |
0 |
TS0 |
密集调研 |
000301.SZ |
东方盛虹 |
1 |
1 |
TS0 |
密集调研 |
000401.SZ |
冀东水泥 |
2 |
2 |
TS0 |
密集调研 |
000932.SZ |
华菱钢铁 |
3 |
3 |
TS0 |
密集调研 |
002013.SZ |
中航机电 |
4 |
4 |
TS0 |
密集调研 |
002106.SZ |
莱宝高科 |
sh.head()
|
ts_code |
hs_type |
in_date |
out_date |
is_new |
0 |
601628.SH |
SH |
20141117 |
NaN |
1 |
1 |
601099.SH |
SH |
20141117 |
NaN |
1 |
2 |
601808.SH |
SH |
20141117 |
NaN |
1 |
3 |
601107.SH |
SH |
20141117 |
NaN |
1 |
4 |
601880.SH |
SH |
20141117 |
NaN |
1 |
sz.head()
|
ts_code |
hs_type |
in_date |
out_date |
is_new |
0 |
002910.SZ |
SZ |
20171114 |
NaN |
1 |
1 |
000016.SZ |
SZ |
20180102 |
NaN |
1 |
2 |
001872.SZ |
SZ |
20180102 |
NaN |
1 |
3 |
000040.SZ |
SZ |
20180102 |
NaN |
1 |
4 |
000401.SZ |
SZ |
20180102 |
NaN |
1 |
corr.head()
|
Unnamed: 0 |
s1 |
s2 |
corr |
0 |
0 |
000001.SZ. |
000001.SZ. |
1.000000 |
1 |
1 |
000001.SZ. |
000002.SZ. |
0.648945 |
2 |
2 |
000001.SZ. |
000005.SZ. |
0.342920 |
3 |
3 |
000001.SZ. |
000009.SZ. |
0.297213 |
4 |
4 |
000001.SZ. |
000010.SZ. |
0.186165 |
stock['行业'] = stock['行业'].fillna('未知')
holder = holder.drop_duplicates(subset=None, keep='first', inplace=False)
sz = Node('深股通',名字='深股通')
graph.create(sz)
sh = Node('沪股通',名字='沪股通')
graph.create(sh)
for i in concept_num.values:
a = Node('概念',概念代码=i[1],概念名称=i[2])
graph.create(a)
for i in stock.values:
a = Node('股票',TS代码=i[1],股票名称=i[3],行业=i[4])
graph.create(a)
for i in holder.values:
a = Node('股东',TS代码=i[0],股东名称=i[1],持股数量=i[2],持股比例=i[3])
graph.create(a)
matcher = NodeMatcher(graph)
for i in holder.values:
a = matcher.match("股票",TS代码=i[0]).first()
b = matcher.match("股东",TS代码=i[0])
for j in b:
r = Relationship(j,'参股',a)
graph.create(r)
print('TS',str(i[0]))
for i in concept.values:
a = matcher.match("股票",TS代码=i[3]).first()
b = matcher.match("概念",概念代码=i[1]).first()
if a == None or b == None:
continue
r = Relationship(a,'概念属于',b)
graph.create(r)