代码库:DGL_异构图神经网络的基本框架

文章目录

  • 创建HGNN
  • 查看HGNN属性与编号
  • 节点(边)特征
  • metapath
    • 根据metapath取子图
  • HGNN 2 GNN
  • 消息传递

import torch as th
import dgl
Using backend: pytorch

创建HGNN

graph_data = {
    ('a', 'B', 'b'): (th.tensor([0, 0]), th.tensor([1, 2])),
    ('b', 'C', 'b'): (th.tensor([1]), th.tensor([2]))
}
heter_g = dgl.heterograph(graph_data)

'''
(src_type, edge_type, dest_type): (src_idx_list, dst_idx_list)

It's should be: len(src_idx_list) == len(dst_idx_list)


heter_g:
    a(0) -> B -> b(0)
    a(0) -> B -> b(1)
    b(0) -> C -> b(1)
'''
"\n(src_type, edge_type, dest_type): (src_idx_list, dst_idx_list)\n\nIt's should be: len(src_idx_list) == len(dst_idx_list)\n\n\nheter_g:\n    a(0) -> B -> b(0)\n    a(0) -> B -> b(1)\n    b(0) -> C -> b(1)\n"
heter_g
Graph(num_nodes={'a': 1, 'b': 3},
      num_edges={('a', 'B', 'b'): 2, ('b', 'C', 'b'): 1},
      metagraph=[('a', 'b', 'B'), ('b', 'b', 'C')])

查看HGNN属性与编号

heter_g.srctypes
['a', 'b']
heter_g.dsttypes
['a', 'b']
heter_g.ntypes
['a', 'b']
heter_g.etypes
['B', 'C']
heter_g.num_nodes()
3
heter_g.num_nodes('a')
1
heter_g.num_edges()
3
heter_g.num_edges('B')
2
heter_g.nodes('b')
tensor([0, 1])
heter_g.edges(etype='B')

'''
form:
    'uv': return src nodes id and dst nodes id
    'eid': return edge id
    'all': return 'uv' and 'eid'
etype: edge type
'''
(tensor([0, 0]), tensor([0, 1]))
heter_g.edges(form='uv',  etype='B')
(tensor([0, 0]), tensor([1, 2]))

节点(边)特征

# 节点类别
heter_g.nodes['a']
NodeSpace(data={})
heter_g.nodes['a'].data['feat'] = th.randn((heter_g.num_nodes('a'), 3))
heter_g.nodes['a'].data['feat']
tensor([[ 1.1605,  0.6090, -0.2546]])
heter_g.ndata['feat']
{'a': tensor([[ 1.1605,  0.6090, -0.2546]])}
# 边类别
heter_g.edges['B']
EdgeSpace(data={})

metapath

heter_g.canonical_etypes
[('a', 'B', 'b'), ('b', 'C', 'b')]
heter_g.metagraph().edges()
OutMultiEdgeDataView([('a', 'b'), ('b', 'b')])
heter_g.metagraph().nodes()
NodeView(('a', 'b'))

根据metapath取子图

sub_g = dgl.edge_type_subgraph(heter_g, [('a', 'B', 'b')])
sub_g
Graph(num_nodes={'a': 1, 'b': 2},
      num_edges={('a', 'B', 'b'): 2},
      metagraph=[('a', 'b', 'B')])
sub_g.nodes['a'].data['feat']
tensor([[ 1.1605,  0.6090, -0.2546]])

HGNN 2 GNN

heter_g
Graph(num_nodes={'a': 1, 'b': 2},
      num_edges={('a', 'B', 'b'): 2, ('b', 'C', 'b'): 1},
      metagraph=[('a', 'b', 'B'), ('b', 'b', 'C')])
g = dgl.to_homogeneous(heter_g)
g
Graph(num_nodes=3, num_edges=3,
      ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)})
# '_' 表示当前数据在原数据的表示形式
# 默认情况下不会保留节点和边上的特征
g.ndata
{'_ID': tensor([0, 0, 1]), '_TYPE': tensor([0, 1, 1])}
heter_g.ntypes
['a', 'b']
# 复制粘贴特征矩阵
# 转换成同构图时 须假定每个节点都具有相同维度大小的特征矩阵 不能没有特征 否则无法合并或保留
heter_g.nodes['b'].data['feat'] = th.randn(heter_g.num_nodes('b'), 3)
heter_g.ndata['feat']
{'a': tensor([[ 1.1605,  0.6090, -0.2546]]),
 'b': tensor([[ 0.6973,  0.0062, -1.3152],
         [-2.1732, -1.2436,  0.0826]])}
g = dgl.to_homogeneous(heter_g, ndata=['feat'], edata=None)
g
Graph(num_nodes=3, num_edges=3,
      ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)})
g.ndata['feat']
tensor([[ 1.1605,  0.6090, -0.2546],
        [ 0.6973,  0.0062, -1.3152],
        [-2.1732, -1.2436,  0.0826]])

消息传递

  • multi_update_all(etype_dict, cross_reducer, apply_node_func=None):

    1. 对每一类metapath 中的所有 metapath instances 进行update_all,得到每一类metapath latent vector。这个过程相当于GNN中的广义消息传递的(1) and (2),即尚未考虑目标节点自身的特征
    2. 对所有metapath (latent vector) 进行 聚合
  • etype_dict: {(src_type, edge_type, dst_type): (message_func, reduce_func, apply_node_func=None}

    • etype_dict[(src_type, edge_type, dst_type)] 表明对此类metapath instances进行update_all的参数
  • cross_reducer: 对所有metapath进行聚合,取值包括 ‘mean’, ‘min’, ‘max’, ‘sum’, ‘stack’

import dgl.function as fn
# metapath types
heter_g.canonical_etypes
[('a', 'B', 'b'), ('b', 'C', 'b')]
for metapath in heter_g.canonical_etypes:
    print(metapath)
('a', 'B', 'b')
('b', 'C', 'b')
metapath_update = {}
cross_reducer = 'mean'

for metapath in heter_g.canonical_etypes:
    metapath_update[metapath] = (fn.copy_u('feat', 'm'), fn.sum('m', 'feat_'))
    
heter_g.multi_update_all(metapath_update, cross_reducer)
# a类型节点没有消息传递后的新特征矩阵feat_ ,因为a类型节点没有出度,无法被更新
heter_g.nodes['a'].data.keys()
dict_keys(['feat'])
heter_g.nodes['b'].data.keys()
dict_keys(['feat', 'feat_'])
# nodes features
heter_g.ndata['feat']
{'a': tensor([[ 1.1605,  0.6090, -0.2546]]),
 'b': tensor([[ 0.6973,  0.0062, -1.3152],
         [-2.1732, -1.2436,  0.0826]])}
# ([ 1.1605,  0.6090, -0.2546] + [ 0.6973,  0.0062, -1.3152]) / 2 = [ 0.9289,  0.3076, -0.7849]
# ([ 1.1605,  0.6090, -0.2546] + 0) / 2 = [ 0.5802,  0.3045, -0.1273]
# mean 根据全图的metapath种类数进行平均

heter_g.nodes['b'].data['feat_']
tensor([[ 0.5802,  0.3045, -0.1273],
        [ 0.9289,  0.3076, -0.7849]])
metapath_update = {}
cross_reducer = 'stack'

for metapath in heter_g.canonical_etypes:
    metapath_update[metapath] = (fn.copy_u('feat', 'm'), fn.sum('m', 'feat1'))
    
heter_g.multi_update_all(metapath_update, cross_reducer)
heter_g.ndata['feat']
{'a': tensor([[ 1.1605,  0.6090, -0.2546]]),
 'b': tensor([[ 0.6973,  0.0062, -1.3152],
         [-2.1732, -1.2436,  0.0826]])}
# 由于b.0 节点 没有种类为C的边的入度 故stack的结果会有一个为全0 的向量
heter_g.ndata['feat1']
{'b': tensor([[[ 1.1605,  0.6090, -0.2546],
          [ 0.0000,  0.0000,  0.0000]],
 
         [[ 1.1605,  0.6090, -0.2546],
          [ 0.6973,  0.0062, -1.3152]]])}

heter_g.is_block?

你可能感兴趣的:(数据科学,神经网络,机器学习,深度学习,图神经网络,异构图神经网络)