目前PyG的教程几乎都是教怎么实现GCN的,但没找到GAT的PyG的实现,基本都是Pytorch实现。Paper需要,学习了GAT,为了保证和GCN用同一框架实现,所以用PyG实现了GAT,这里记录下来,用PyG搭建了GAT网络。
1.GAT的原理移步这里向往的GAT,介绍的很详细。
2.PyG的基本操作移步这几篇:
GCN学习:Pytorch-Geometric教程(一)
GCN学习:Pytorch-Geometric教程(二)
GCN学习:用PyG实现自定义layers的GCN网络及训练(五)
import torch
import math
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops,degree
from torch_geometric.datasets import Planetoid
import ssl
import torch.nn.functional as F
class GAL(MessagePassing):
def __init__(self,in_features,out_featrues):
super(GAL,self).__init__(aggr='add')
self.a = torch.nn.Parameter(torch.zeros(size=(2*out_featrues, 1)))
torch.nn.init.xavier_uniform_(self.a.data, gain=1.414) # 初始化
# 定义leakyrelu激活函数
self.leakyrelu = torch.nn.LeakyReLU()
self.linear=torch.nn.Linear(in_features,out_featrues)
def forward(self,x,edge_index):
x=self.linear(x)
N=x.size()[0]
row,col=edge_index
a_input = torch.cat([x[row], x[col]], dim=1)
print('a_input.size',a_input.size())
# [N, N, 1] => [N, N] 图注意力的相关系数(未归一化)
temp=torch.mm(a_input,self.a).squeeze()
print('temp.size',temp.size())
e = self.leakyrelu(temp)
print('e',e)
print('e.size', e.size())
#e_all为同一个节点与其全部邻居的计算的分数的和,用于计算归一化softmax
e_all=torch.zeros(x.size()[0])
count = 0
for i in col:
e_all[i]+=e[count]
count=count+1
print('e_all',e_all)
for i in range(len(e)):
e[i]=math.exp(e[i])/math.exp(e_all[col[i]])
print('attention',e)
print('attention.size',e.size())
return self.propagate(edge_index,x=x,norm=e)
def message(self, x_j, norm):
print('x_j:', x_j)
print('x_j.size', x_j.size())
print('norm', norm)
print('norm.size', norm.size())
print('norm.view.size', norm.view(-1, 1).size())
return norm.view(-1, 1) * x_j
ssl._create_default_https_context = ssl._create_unverified_context
dataset = Planetoid(root='Cora', name='Cora')
x=dataset[0].x
edge_index=dataset[0].edge_index
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.gal = GAL(dataset.num_node_features,16)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = F.dropout(x, training=self.training)
x = self.gal(x, edge_index)
print('x_gal',x.size())
return F.log_softmax(x, dim=1)
model=Net()
data=dataset[0]
out=Net()(data)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(1):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct/int(data.test_mask.sum())
print('Accuracy:{:.4f}'.format(acc))
>>>Accuracy:0.3880
GAL层的写法思路参考GCN学习:用PyG实现自定义layers的GCN网络及训练(五)从而可以实现自定义网络层。所以核心内容还是编写init forward message
函数。
GAL层要实现的工作:
1.进行特征映射
2.计算所有 e e eij值
3.计算所有 a l p h a alpha alphaij
4.加权求和
class GAL(MessagePassing):
def __init__(in_features,out_featrues):
#进行加权求和
super(GAL,self).__init__(aggr='add')
#定义attention参数a
self.a = torch.nn.Parameter(torch.zeros(size=(2*out_featrues, 1)))
torch.nn.init.xavier_uniform_(self.a.data, gain=1.414) # 初始化
# 定义leakyrelu激活函数
self.leakyrelu = torch.nn.LeakyReLU()
self.linear=torch.nn.Linear(in_features,out_featrues)
def forward(self,x,edge_index):
#特征映射
x=self.linear(x)
N=x.size()[0]
col,row=edge_index
#将相邻接点的特征拼接,然后计算e值
a_input = torch.cat([x[row], x[col]], dim=1)
print('a_input.size',a_input.size())
#将规模压缩到一维
temp=torch.mm(a_input,self.a).squeeze()
print('temp.size',temp.size())
e = self.leakyrelu(temp)
print('e',e)
print('e.size', e.size())
#e_all为同一个节点与其全部邻居的计算的分数的和,用于计算归一化softmax
e_all=torch.zeros(x.size()[0])
count = 0
for i in col:
e_all[i]+=e[count]
count=count+1
print('e_all',e_all)
#计算alpha值
for i in range(len(e)):
e[i]=math.exp(e[i])/math.exp(e_all[col[i]])
print('attention',e)
print('attention.size',e.size())
#传递信息
return self.propagate(edge_index,x=x,norm=e)
def message(self, x_j, norm):
print('x_j:', x_j)
print('x_j.size', x_j.size())
print('norm', norm)
print('norm.size', norm.size())
print('norm.view.size', norm.view(-1, 1).size())
#计算求和项
return norm.view(-1, 1) * x_j
>>>a_input.size torch.Size([10556, 32])
temp.size torch.Size([10556])
e tensor([-0.0023, -0.0004, -0.0010, ..., -0.0054, -0.0048, -0.0023],
grad_fn=<LeakyReluBackward0>)
e.size torch.Size([10556])
e_all tensor([-0.0037, 0.7354, 0.1100, ..., -0.0025, 0.0254, -0.0182],
grad_fn=<CopySlices>)
attention tensor([1.0014, 1.0033, 1.0027, ..., 1.0130, 1.0135, 1.0161],
grad_fn=<CopySlices>)
attention.size torch.Size([10556])
x_j: tensor([[-0.0411, 0.0475, -0.0020, ..., 0.1014, 0.1919, 0.0331],
[-0.0411, 0.0475, -0.0020, ..., 0.1014, 0.1919, 0.0331],
[-0.0411, 0.0475, -0.0020, ..., 0.1014, 0.1919, 0.0331],
...,
[-0.1486, -0.1743, -0.1428, ..., 0.1968, 0.0718, -0.0176],
[-0.1486, -0.1743, -0.1428, ..., 0.1968, 0.0718, -0.0176],
[-0.1486, -0.1743, -0.1428, ..., 0.1968, 0.0718, -0.0176]],
grad_fn=<IndexSelectBackward>)
x_j.size torch.Size([10556, 16])
norm tensor([1.0014, 1.0033, 1.0027, ..., 1.0130, 1.0135, 1.0161],
grad_fn=<CopySlices>)
norm.size torch.Size([10556])
norm.view.size torch.Size([10556, 1])
x_gal torch.Size([2708, 16])
a_input.size torch.Size([10556, 32])
temp.size torch.Size([10556])
e tensor([-0.0016, -0.0020, -0.0010, ..., 0.2144, 0.0202, -0.0003],
grad_fn=<LeakyReluBackward0>)
e.size torch.Size([10556])
e_all tensor([-0.0046, 0.1969, 0.4509, ..., 0.1620, -0.0042, 0.3253],
grad_fn=<CopySlices>)
attention tensor([1.0030, 1.0026, 1.0036, ..., 0.8951, 0.7370, 0.7221],
grad_fn=<CopySlices>)
attention.size torch.Size([10556])
x_j: tensor([[-0.1055, -0.0221, 0.0717, ..., 0.0453, 0.0534, 0.0031],
[-0.1055, -0.0221, 0.0717, ..., 0.0453, 0.0534, 0.0031],
[-0.1055, -0.0221, 0.0717, ..., 0.0453, 0.0534, 0.0031],
...,
[ 0.0421, 0.0349, -0.0459, ..., 0.1171, 0.0008, 0.0766],
[ 0.0421, 0.0349, -0.0459, ..., 0.1171, 0.0008, 0.0766],
[ 0.0421, 0.0349, -0.0459, ..., 0.1171, 0.0008, 0.0766]],
grad_fn=<IndexSelectBackward>)
x_j.size torch.Size([10556, 16])
norm tensor([1.0030, 1.0026, 1.0036, ..., 0.8951, 0.7370, 0.7221],
grad_fn=<CopySlices>)
norm.size torch.Size([10556])
norm.view.size torch.Size([10556, 1])
x_gal torch.Size([2708, 16])
a_input.size torch.Size([10556, 32])
temp.size torch.Size([10556])
e tensor([ 0.2280, 0.2321, -0.0004, ..., 0.1363, 0.3448, 0.0414],
grad_fn=<LeakyReluBackward0>)
e.size torch.Size([10556])
e_all tensor([ 0.4597, -0.0024, 0.2359, ..., 0.0669, 0.2952, 0.5938],
grad_fn=<CopySlices>)
attention tensor([0.7932, 0.7964, 0.6312, ..., 0.6329, 0.7796, 0.5756],
grad_fn=<CopySlices>)
attention.size torch.Size([10556])
x_j: tensor([[-0.0510, 0.0875, 0.1096, ..., -0.1464, -0.0774, -0.0326],
[-0.0510, 0.0875, 0.1096, ..., -0.1464, -0.0774, -0.0326],
[-0.0510, 0.0875, 0.1096, ..., -0.1464, -0.0774, -0.0326],
...,
[ 0.0554, 0.0655, -0.0448, ..., -0.0251, -0.0492, -0.1602],
[ 0.0554, 0.0655, -0.0448, ..., -0.0251, -0.0492, -0.1602],
[ 0.0554, 0.0655, -0.0448, ..., -0.0251, -0.0492, -0.1602]],
grad_fn=<IndexSelectBackward>)
x_j.size torch.Size([10556, 16])
norm tensor([0.7932, 0.7964, 0.6312, ..., 0.6329, 0.7796, 0.5756],
grad_fn=<CopySlices>)
norm.size torch.Size([10556])
norm.view.size torch.Size([10556, 1])