项目地址;
Macos安装方式:
$ pip install --verbose --no-cache-dir torch-scatter
$ pip install --verbose --no-cache-dir torch-sparse
$ pip install --verbose --no-cache-dir torch-cluster
$ pip install --verbose --no-cache-dir torch-spline-conv (optional)
$ pip install torch-geometric
图的表示:
图的节点可以根据其值进行向量表示,而节点与节点间使用邻接矩阵来表示。
邻接矩阵主要由源节点(第一列)和目标节点(第二列)组成。源节点和目标节点顺序对应。比如 ,在图中,节点0的目标节点有节点1,节点3.可以用[[0,0],[1,3]]
来表示。
所以,邻接矩阵的关键是,源节点列和目标节点列的对应关系表示。
x = torch.tensor([[2,1], [5,6], [3,7], [12,0]],\
dtype=torch.float)
y = torch.tensor([0, 1, 0, 1], dtype=torch.float)
edge_index = torch.tensor([[0, 1, 2, 0, 3],\
[1, 0, 1, 3, 2]], dtype=torch.long)
图神经网络定义:
节点i的值是他相邻的节点加权和它上一轮的值之和。
Graph Conv 代码
class GraphConv(MessagePassing):
def __init__(self, in_channels, out_channels, aggr='add', bias=True,
**kwargs):
super(GraphConv, self).__init__(aggr=aggr, **kwargs)
self.in_channels = in_channels
self.out_channels = out_channels
self.weight = Parameter(torch.Tensor(in_channels, out_channels))
self.lin = torch.nn.Linear(in_channels, out_channels, bias=bias)
self.reset_parameters()
def reset_parameters(self):
uniform(self.in_channels, self.weight)
self.lin.reset_parameters()
def forward(self, x, edge_index, edge_weight=None, size=None):
""""""
h = torch.matmul(x, self.weight)
return self.propagate(edge_index, size=size, x=x, h=h,
edge_weight=edge_weight)
def message(self, h_j, edge_weight):
return h_j if edge_weight is None else edge_weight.view(-1, 1) * h_j
def update(self, aggr_out, x):
return aggr_out + self.lin(x)
def __repr__(self):
return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
self.out_channels)
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ENZYMES')
dataset = TUDataset(path, name='ENZYMES')
dataset = dataset.shuffle()
n = len(dataset) // 10
test_dataset = dataset[:n]
train_dataset = dataset[n:]
test_loader = DataLoader(test_dataset, batch_size=60)
train_loader = DataLoader(train_dataset, batch_size=60)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = GraphConv(dataset.num_features, 128)
self.pool1 = TopKPooling(128, ratio=0.8)
self.conv2 = GraphConv(128, 128)
self.pool2 = TopKPooling(128, ratio=0.8)
self.conv3 = GraphConv(128, 128)
self.pool3 = TopKPooling(128, ratio=0.8)
self.lin1 = torch.nn.Linear(256, 128)
self.lin2 = torch.nn.Linear(128, 64)
self.lin3 = torch.nn.Linear(64, dataset.num_classes)
def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
x = F.relu(self.conv1(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = F.relu(self.conv2(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = F.relu(self.conv3(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = x1 + x2 + x3
x = F.relu(self.lin1(x))
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.lin2(x))
x = F.log_softmax(self.lin3(x), dim=-1)
return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
def train(epoch):
model.train()
loss_all = 0
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, data.y)
loss.backward()
loss_all += data.num_graphs * loss.item()
optimizer.step()
return loss_all / len(train_dataset)
def test(loader):
model.eval()
correct = 0
for data in loader:
data = data.to(device)
pred = model(data).max(dim=1)[1]
correct += pred.eq(data.y).sum().item()
return correct / len(loader.dataset)
for epoch in range(1, 201):
loss = train(epoch)
train_acc = test(train_loader)
test_acc = test(test_loader)
print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
format(epoch, loss, train_acc, test_acc))
Q1: 数据如何构造,如何合理定义节点间的邻接矩阵?
参考: